custom_kernels.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. import numpy as np
  2. from sklearn.gaussian_process.kernels import Hyperparameter, Kernel, StationaryKernelMixin
  3. from scipy.spatial.distance import squareform
  4. def _convert_to_double(X):
  5. return np.ascontiguousarray(X, dtype=np.double)
  6. class Gibbs(Kernel):
  7. """Gibbs kernel
  8. sqrt( (2*l(x)l(x')) / (l(x)**2 + l(x')**2 ) * exp( -(x-x')**2 / (l(x)**2 + l(x')**2) )
  9. where, l(x) = b*x + c
  10. This is the RBF kernel where the length scale is allowed to vary by some
  11. function, in this case a linear function.
  12. """
  13. def __init__(self, l0=1.0, l0_bounds=(1e-6, 1e5), l_slope=1.0, l_slope_bounds=(1e-6, 1e5)):
  14. self.l0 = l0
  15. self.l0_bounds = l0_bounds
  16. self.l_slope = l_slope
  17. self.l_slope_bounds = l_slope_bounds
  18. @property
  19. def hyperparameter_l0(self):
  20. return(Hyperparameter("l0", "numeric", self.l0_bounds))
  21. @property
  22. def hyperparameter_l_slope(self):
  23. return(Hyperparameter("l_slope", "numeric", self.l_slope_bounds))
  24. def __call__(self, X, Y=None, eval_gradient=False):
  25. """Retrn K(X,Y)"""
  26. def l(x): # Helper function
  27. return self.l0 + self.l_slope * (x)
  28. X = np.atleast_2d(X)
  29. s = X.shape
  30. if len(s) != 2:
  31. raise ValueError('A 2-dimensional array must be passed.')
  32. if Y is None:
  33. m, n = s
  34. K = np.zeros((m * (m - 1)) // 2, dtype=np.double)
  35. X = _convert_to_double(X)
  36. X = X - X[0] # note: this step just shifts the x-range to make the hyper-pars a bit more intuitive
  37. t = 0
  38. for i in range(0, m - 1):
  39. for j in range(i + 1, m):
  40. xi_xj = X[i] - X[j]
  41. li = l(X[i]) # b*x+c ( b = l_slope, c = l0 )
  42. lj = l(X[j])
  43. li2_lj2 = np.dot(li, li) + np.dot(lj, lj) # l(x)^2 + l(x')^2
  44. coeff = np.sqrt(2 * li * lj / (li2_lj2))
  45. K[t] = coeff * np.exp(-1 * (xi_xj * xi_xj) / li2_lj2)
  46. t = t + 1
  47. K = squareform(K)
  48. np.fill_diagonal(K, 1)
  49. if eval_gradient:
  50. # approximate gradient numerically
  51. def f(theta): # helper function
  52. return self.clone_with_theta(theta)(X, Y)
  53. return K, _approx_fprime(self.theta, f, 1e-10)
  54. else:
  55. return K
  56. else:
  57. mx, nx = s
  58. sy = Y.shape
  59. my, ny = sy
  60. K = np.zeros((mx, my), dtype=np.double)
  61. X = _convert_to_double(X)
  62. Y = _convert_to_double(X)
  63. X = X - X[0] # note: this step just shifts the x-range to make the hyper-pars a bit more intuitive
  64. Y = Y - Y[0]
  65. # import pdb; pdb.set_trace()
  66. t = 0
  67. for i in range(0, mx):
  68. for j in range(0, my):
  69. xi_yj = X[i] - Y[j]
  70. li = l(X[i]) # b*x+c ( b = l_slope, c = l0 )
  71. lj = l(Y[j])
  72. li2_lj2 = li * li + lj * lj # l(x)^2 + l(x')^2
  73. coeff = np.sqrt(2 * li * lj / (li2_lj2))
  74. K[i][j] = coeff * np.exp(-1 * (xi_yj * xi_yj) / li2_lj2)
  75. t = t + 1
  76. # K = squareform(K)
  77. # np.fill_diagonal(K,1)
  78. if eval_gradient:
  79. # approximate gradient numerically
  80. def f(theta): # helper function
  81. return self.clone_with_theta(theta)(X, Y)
  82. return K, _approx_fprime(self.theta, f, 1e-10)
  83. else:
  84. return K
  85. pass # __call__
  86. def diag(self, X):
  87. return np.diag(self(X))
  88. def is_stationary(self):
  89. return False
  90. def __repr__(self):
  91. return "{0}(l0={1:.3g}, l_slope={2:.3g})".format(self.__class__.__name__, self.l0, self.l_slope)
  92. class FallExp(Kernel):
  93. """Falling exponential kernel
  94. exp( (d - (x+x'))/(2*a) )
  95. """
  96. def __init__(self, d=1.0, d_bounds=(1e-5, 1e5), a=1.0, a_bounds=(1e-5, 1e5)):
  97. self.d = d
  98. self.d_bounds = d_bounds
  99. self.a = a
  100. self.a_bounds = a_bounds
  101. @property
  102. def hyperparameter_d(self):
  103. return(Hyperparameter("d", "numeric", self.d_bounds))
  104. @property
  105. def hyperparameter_a(self):
  106. return(Hyperparameter("a", "numeric", self.a_bounds))
  107. def __call__(self, X, Y=None, eval_gradient=False):
  108. """Return K(X,Y)
  109. """
  110. X = np.atleast_2d(X)
  111. s = X.shape
  112. if len(s) != 2:
  113. raise ValueError('A 2-dimensional array must be passed.')
  114. if Y is None:
  115. m, n = s
  116. K = np.zeros((m * (m - 1)) // 2, dtype=np.double)
  117. X = _convert_to_double(X)
  118. t = 0
  119. for i in range(0, m - 1):
  120. for j in range(i + 1, m):
  121. xi_xj = np.dtype('d')
  122. xi_xj = X[i] + X[j]
  123. K[t] = np.exp((self.d - xi_xj) / (2 * self.a))
  124. t = t + 1
  125. K = squareform(K)
  126. np.fill_diagonal(K, 1)
  127. if eval_gradient:
  128. # approximate gradient numerically
  129. def f(theta): # helper function
  130. return self.clone_with_theta(theta)(X, Y)
  131. return K, _approx_fprime(self.theta, f, 1e-10)
  132. return K, None
  133. else:
  134. return K
  135. else:
  136. mx, nx = s
  137. sy = Y.shape
  138. my, ny = sy
  139. K = np.zeros((mx, my), dtype=np.double)
  140. X = _convert_to_double(X)
  141. Y = _convert_to_double(Y)
  142. for i in range(0, mx):
  143. for j in range(0, my):
  144. xi_yj = np.dtype('d')
  145. xi_yj = X[i] + Y[j]
  146. K[i][j] = np.exp((self.d - xi_yj) / (2 * self.a))
  147. # K = squareform(K)
  148. if eval_gradient:
  149. # approximate gradient numerically
  150. def f(theta): # helper function
  151. return self.clone_with_theta(theta)(X, Y)
  152. return K, _approx_fprime(self.theta, f, 1e-10)
  153. else:
  154. return K
  155. pass # __call__
  156. def diag(self, X):
  157. return np.diag(self(X))
  158. def is_stationary(self):
  159. return False
  160. def __repr__(self):
  161. return "{0}(d={1:.3g}, a={2:.3g})".format(self.__class__.__name__, self.d, self.a)
  162. pass # fallExp
  163. class LinearNoiseKernel(StationaryKernelMixin, Kernel):
  164. """White kernel.
  165. The main use-case of this kernel is as part of a sum-kernel where it
  166. explains the noise-component of the signal. Tuning its parameter
  167. corresponds to estimating the noise-level.
  168. k(x_1, x_2) = noise_level(x_1) if x_1 == x_2 else 0
  169. Parameters
  170. ----------
  171. noise_level : array, shape (n_samples_X, n_features)
  172. Parameter controlling the noise level (vector of same shape as X)
  173. """
  174. def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5), b=1.0, b_bounds=(1e-5, 1e5)):
  175. self.noise_level = noise_level
  176. self.noise_level_bounds = noise_level_bounds
  177. self.b = b
  178. self.b_bounds = b_bounds
  179. @property
  180. def hyperparameter_noise_level(self):
  181. return Hyperparameter("noise_level", "numeric", self.noise_level_bounds)
  182. @property
  183. def hyperparameter_b(self):
  184. return(Hyperparameter("b", "numeric", self.b_bounds))
  185. def __call__(self, X, Y=None, eval_gradient=False):
  186. """Return the kernel k(X, Y) and optionally its gradient.
  187. Parameters
  188. ----------
  189. X : array, shape (n_samples_X, n_features)
  190. Left argument of the returned kernel k(X, Y)
  191. Y : array, shape (n_samples_Y, n_features), (optional, default=None)
  192. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  193. if evaluated instead.
  194. eval_gradient : bool (optional, default=False)
  195. Determines whether the gradient with respect to the kernel
  196. hyperparameter is determined. Only supported when Y is None.
  197. Returns
  198. -------
  199. K : array, shape (n_samples_X, n_samples_Y)
  200. Kernel k(X, Y)
  201. K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
  202. The gradient of the kernel k(X, X) with respect to the
  203. hyperparameter of the kernel. Only returned when eval_gradient
  204. is True.
  205. """
  206. X = np.atleast_2d(X)
  207. if Y is not None and eval_gradient:
  208. raise ValueError("Gradient can only be evaluated when Y is None.")
  209. if Y is None:
  210. K = np.zeros((X.shape[0], X.shape[0]))
  211. for i in range(0, X.shape[0]):
  212. K[i, i] = self.noise_level + (-1.0) * self.b * (X[i][0] - 99.0)
  213. if(K[i, i] < 0): K[i, i] = 0.0
  214. if eval_gradient:
  215. def f(theta):
  216. return self.clone_with_theta(theta)(X, Y)
  217. return K, _approx_fprime(self.theta, f, 1e-10)
  218. return K, None
  219. else: return K
  220. else: return np.zeros((X.shape[0], Y.shape[0]))
  221. def diag(self, X):
  222. """Returns the diagonal of the kernel k(X, X).
  223. The result of this method is identical to np.diag(self(X)); however,
  224. it can be evaluated more efficiently since only the diagonal is
  225. evaluated.
  226. Parameters
  227. ----------
  228. X : array, shape (n_samples_X, n_features)
  229. Left argument of the returned kernel k(X, Y)
  230. Returns
  231. -------
  232. K_diag : array, shape (n_samples_X,)
  233. Diagonal of kernel k(X, X)
  234. """
  235. K_diag = np.ones(X.shape[0])
  236. for i in range(0, X.shape[0]):
  237. K_diag[i] = self.noise_level + (-1.0) * self.b * (X[i][0] - 99.0)
  238. return K_diag
  239. def __repr__(self):
  240. return "{0}(noise_level={1},b={2})".format(self.__class__.__name__, self.noise_level, self.b)
  241. # adapted from scipy/optimize/optimize.py for functions with 2d output
  242. def _approx_fprime(xk, f, epsilon, args=()):
  243. f0 = f(*((xk,) + args))
  244. grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float)
  245. ei = np.zeros((len(xk), ), float)
  246. for k in range(len(xk)):
  247. ei[k] = 1.0
  248. d = epsilon * ei
  249. grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k]
  250. ei[k] = 0.0
  251. return grad