Decomp.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. import numpy as np
  2. import numexpr as ne
  3. import Tools.CacheMgr as Cache
  4. from Tools.PrintMgr import *
  5. from Tools.Base import ParametricObject
  6. from scipy.optimize import minimize
  7. from scipy.linalg import solve
  8. from numpy.linalg import slogdet, multi_dot
  9. from scipy.linalg import cho_factor, cho_solve
  10. from math import pi, e, sqrt
  11. from scipy.special import erfinv, gammaln, gammainc, gammaincc
  12. from scipy.stats import norm
  13. import time
  14. ###
  15. ## Conduct a signal scan on a DataSet and store the results.
  16. ###
  17. class SignalScan(ParametricObject):
  18. # p-values for [-2, -1, 0, 1, 2] sigma levels
  19. _siglevels = [0.022750, 0.158655, 0.5, 0.841345, 0.977250]
  20. # Get the background and signal variance contributions
  21. @Cache.Element("{self.Factory.CacheDir}", "var", "{self.Factory}", "{self.DataSet}", "{self.SigName}.npy")
  22. def _varCache(self, DataMom, SigMoms, P, Rs):
  23. n = self.DataSet.N
  24. P1 = P[-1]
  25. Mb = DataMom.copy()
  26. Mb[n:] = Rs[:-1].dot( SigMoms[:-1, n:] )
  27. Ms = SigMoms[-1]
  28. Mp = np.zeros_like(Mb)
  29. Mp[n:] = P1
  30. Mx = self.Factory.MxTensor(Mb, Ms, Mp)
  31. return P1.dot( Mx[n:,n:] ).T
  32. # return the first three central moments assuming a signal contribution 't'.
  33. def _cmom(self, t):
  34. mu1 = t
  35. mu2 = self.B2 + mu1*self.S2 - mu1**2
  36. mu3 = self.B3 + mu1*self.S3 - 3*mu1*mu2 - mu1**3
  37. mu3 = np.maximum(mu2, mu3)
  38. return mu1, mu2, mu3
  39. # return the CPD parameters
  40. def _CPDpar(self, mu1, mu2, mu3, N):
  41. dN = N - mu1 * self.DataSet.Nint
  42. a = (mu2**3 / mu3**2) * self.DataSet.Nint
  43. b = mu2 / mu3
  44. k = a + b*dN + 0.5
  45. return a, k
  46. def CPDpdf(self, mu1, mu2, mu3, N):
  47. a, k = self._CPDpar(mu1, mu2, mu3, N)
  48. return np.exp( (k-1)*np.log(a) - a - gammaln(k) )
  49. def CPDcdf(self, mu1, mu2, mu3, N):
  50. a, k = self._CPDpar(mu1, mu2, mu3, N)
  51. return np.nan_to_num(gammaincc(k, a))
  52. def CPDcdf1m(self, mu1, mu2, mu3, N):
  53. a, k = self._CPDpar(mu1, mu2, mu3, N)
  54. return np.nan_to_num(gammainc(k, a))
  55. # Return the upper limit for x (x in events)
  56. def UL(self, x):
  57. mu1, mu2, mu3 = self._cmom(self.t)
  58. if self.Lumi > 0:
  59. mu2 += self.LumiUnc * self.t**2
  60. pdf = self.CPDpdf(mu1, mu2, mu3, x)
  61. c = pdf.cumsum()
  62. idx = np.searchsorted(c/c[-1], self.Confidence)
  63. ul = self.t[idx] * self.DataSet.Nint
  64. return ul / self.Lumi if self.Lumi > 0 else ul
  65. # Return the moments equivalent to (-2, -1, 0, +1, +2)-sigma levels
  66. def _levels(self, Mu2, Mu3, npt=int(1e5), levels=(-2, -1, 0, 1, 2)):
  67. var = Mu2 * self.DataSet.Nint
  68. r = 4*sqrt(var)
  69. t = np.linspace( -r, r, npt + 1)
  70. cdf = self.CPDcdf(0, Mu2, Mu3, t)
  71. idx = np.searchsorted(cdf, norm.cdf(levels) ) - 1
  72. return t[idx]
  73. # Implementation of iterator.
  74. def next(self):
  75. self.idx += 1
  76. if self.idx >= len(self.Names):
  77. raise StopIteration
  78. Data = self.DataSet
  79. DataMom = getattr(Data, Data.attr)
  80. self.SigName = self.Names[self.idx]
  81. SigMoms, P = Data.NormSignalEstimators(self.SigName) # Raw signals, estimators
  82. Rs = P.dot(DataMom[Data.N:]) # Estimated signal moments
  83. # Get the background and signal variance contributions.
  84. P1 = P[-1]
  85. PB, PS, P2 = self._varCache(DataMom, SigMoms, P, Rs)
  86. # Second central moments for b-part, s-part and b-only
  87. self.B2 = P1.dot( PB )
  88. self.S2 = P1.dot( PS )
  89. Mu2 = multi_dot((P1, self.MxC[Data.N:,Data.N:], P1))
  90. # Third central moments for b-part, s-part and b-only
  91. self.B3 = P2.dot( PB )
  92. self.S3 = P2.dot( PS )
  93. Mu3 = multi_dot((P1, self.MxC[Data.N:,Data.N:], P2))
  94. Mu3 = max(Mu2, Mu3)
  95. # Store the output values and calculate limits
  96. i = self.idx
  97. self.var = Mu2 * Data.Nint
  98. pts = self._levels(Mu2, Mu3)
  99. self.Mass [i] = Data[self.SigName].Mass
  100. self.Yield [i] = Rs[-1] * Data.Nint
  101. self.Unc [i] = sqrt(self.var)
  102. self.ObsLim[i] = self.UL( self.Yield[i] )
  103. self.ExpLim[i] = [ self.UL( v ) for v in pts ]
  104. self.PValue[i] = self.CPDcdf1m(0, Mu2, Mu3, self.Yield[i])
  105. self.Sig [i] = norm.isf(self.PValue[i])
  106. return self.SigName, self.Yield[i], self.Unc[i], self.ObsLim[i], self.ExpLim[i]
  107. # Initialize covariances for shared signals. Transposes copies
  108. # are stored in memory, because this optimizes memory access
  109. # and greatly speeds up the sums in _setShortCov.
  110. def __iter__(self):
  111. sl = self.DataSet.GetActive()
  112. SigMoms = sum( self.DataSet[s].Moment * self.DataSet[s].Sig for s in sl )
  113. Data = self.DataSet
  114. n = self.DataSet.N
  115. DataMom = getattr(Data, Data.attr)
  116. Mom = DataMom.copy()
  117. if len(self.DataSet.GetActive()) > 0:
  118. Mom[n:] = SigMoms[n:]
  119. self.MxC = self.Factory.CovMatrix(Mom)
  120. return self
  121. def __init__(self, Factory, DataSet, *arg, **kwargs):
  122. ParametricObject.__init__(self)
  123. self.Factory = Factory
  124. self.DataSet = DataSet
  125. self.Lumi = float(kwargs.get("Lumi", 0.0))
  126. self.LumiUnc = float(kwargs.get("LumiUnc", 0.0))
  127. self.Nmax = float(kwargs.get("Nmax", 1e5))
  128. self.Npt = float(kwargs.get("Npt", 1e6))
  129. self.Confidence = float(kwargs.get("ConfidenceLevel", 0.95))
  130. self.Names = arg
  131. self.Mass = np.zeros( (len(arg), ) )
  132. self.Yield = np.zeros( (len(arg), ) )
  133. self.Unc = np.zeros( (len(arg), ) )
  134. self.ExpLim = np.zeros( (len(arg), 5 ) )
  135. self.ObsLim = np.zeros( (len(arg), ) )
  136. self.PValue = np.zeros( (len(arg), ) )
  137. self.Sig = np.zeros( (len(arg), ) )
  138. # Initial idx and test points
  139. self.idx = -1
  140. self.t = np.linspace(0, self.Nmax, self.Npt) / self.DataSet.Nint
  141. ###
  142. ## Optimize hyperparameters on a specified DataSet.
  143. ###
  144. class Optimizer(ParametricObject):
  145. # Transform the dataset to the specified hyperparameters
  146. def UpdateXfrm(self, reduced=True, **kwargs):
  147. self.update( kwargs )
  148. self.PriorGen.update(self.ParamVal)
  149. self.Prior.FromIter(self.PriorGen)
  150. Act = self.DataSet.GetActive() if reduced else self.DataSet.Signals
  151. inm = [ self.DataSet[name].Mom for name in Act ]
  152. outm = self.Xfrm(self.DataSet.Mom, *inm, **self.ParamVal)
  153. Mom, sigs = outm[0], outm[1:]
  154. for name, m in zip(Act, sigs):
  155. self.DataSet[name].MomX[len(m):] = 0
  156. self.DataSet[name].MomX[:len(m)] = m
  157. self.DataSet.MomX[:] = Mom
  158. self.DataSet.Full.Set (Mom=Mom)
  159. # Scan for the optimal number of moments.
  160. @Cache.Element("{self.Factory.CacheDir}", "LLH", "{self.Factory}", "{self}", "{self.DataSet}.json")
  161. def ScanN(self, reduced=True, **kwargs):
  162. L = np.full((self.Factory["Ncheck"],), np.inf)
  163. D = self.DataSet
  164. a = kwargs.get("attr", "MomX")
  165. h = D.Full.Mom[1] * sqrt(2)
  166. self.UpdateXfrm(reduced=reduced)
  167. for j in range(2, self.Factory["Ncheck"]):
  168. try:
  169. D.SetN(j, attr=a)
  170. dof = (j**2 + j) / 2
  171. Raw = D.TestS.LogP(D.Full)
  172. Pen = float(j) * np.log(D.Nint/(dof*h*e)) / 2
  173. pdot()
  174. except (np.linalg.linalg.LinAlgError, ValueError):
  175. pdot(pchar="x")
  176. continue
  177. L[j] = Raw + Pen
  178. j = np.nanargmin(L)
  179. return j, L[j]
  180. # Optimize the hyperparameters
  181. @pstage("Optimizing Hyperparameters")
  182. def FitW(self, initial_simplex=None):
  183. ini = [ self.Factory[p] for p in self.Factory._fitparam ]
  184. res = minimize(self.ObjFunc, ini, method='Nelder-Mead', options={'xatol': 1e-2, 'initial_simplex' : initial_simplex})
  185. par = dict(zip(self.Factory._fitparam, res.x))
  186. # Now set to the best parameters
  187. self.update( par )
  188. self.UpdateXfrm()
  189. N, L = self.ScanN()
  190. return N, L, par
  191. # Scan through a grid of hyperparameters
  192. @pstage("Scanning Hyperparameters")
  193. def ScanW(self, Ax, Lx, Ad, Ld):
  194. D = self.DataSet
  195. LLH = []
  196. P = []
  197. for ax, lx, ad, ld in zip(Ax.ravel(), Lx.ravel(), Ad.ravel(), Ld.ravel()):
  198. if ad != self.Factory["Alpha"] or ld != self.Factory["Lambda"] or not hasattr(D, "Full"):
  199. self.Factory.update( {'Alpha': ad, 'Lambda': ld} )
  200. D.Decompose(xonly=True)
  201. LLH.append( self.ObjFunc((ax, lx)) )
  202. P.append ( { 'Alpha' : ax, 'Lambda' : lx } )
  203. LLH = np.asarray(LLH)
  204. return LLH.reshape(Ax.shape), P[ np.nanargmin(LLH) ]
  205. def ObjFunc(self, arg):
  206. prst()
  207. pstr(str_nl % "Nint" + "%.2f" % self.DataSet.Nint)
  208. pstr(str_nl % "Neff" + "%.2f" % self.DataSet.Neff)
  209. pstr(str_nl % "Signals" + " ".join(self.DataSet.GetActive()))
  210. pstr(str(self))
  211. pini("MOMENT SCAN")
  212. self.update( dict( zip(self.Factory._fitparam, arg) ) )
  213. self.Xfrm.update(self.Factory.ParamVal)
  214. j, L = self.ScanN()
  215. pstr(str_nl % "Nmom" + "%2d" % j)
  216. pstr(str_nl % "LLH" + "%.2f" % L)
  217. pstr(str_nl % "Nfev" + "%d / %d" % (self.Nfev, self.Nfex))
  218. self.Nfev += 1
  219. return L
  220. def __init__(self, Factory, DataSet, **kwargs):
  221. self.DataSet = DataSet
  222. self.Factory = Factory
  223. self.Nfev = 0
  224. self.Nfex = 0
  225. # Copy parameters from the Factory object.
  226. self._param = Factory._fitparam
  227. self._fitparam = Factory._fitparam
  228. ParametricObject.__init__(self, **Factory.ParamVal)
  229. self.Prior = TruncatedSeries(self.Factory, np.zeros((Factory["Nbasis"],)), DataSet.Neff/DataSet.Nint, Nmax=2 )
  230. self.PriorGen = Factory.Pri()
  231. self.Xfrm = self.Factory.Xfrm()
  232. ###
  233. ## An object to hold data to decompose along with signal objects.
  234. ###
  235. class DataSet(ParametricObject):
  236. def AddParametricSignal(self, name, func, **kwargs):
  237. if name in self:
  238. return
  239. self[name] = ParametricSignal(self.Factory, name, func, **kwargs)
  240. self.Signals.append(name)
  241. def DelSignal(self, name):
  242. try:
  243. list.remove(name)
  244. except ValueError:
  245. pass
  246. try:
  247. del self[name]
  248. except ValueError:
  249. pass
  250. def GetActive(self, *args): return [ n for n in self.Signals if self[n].Active or self[n].name in args ]
  251. # Decompose the dataset and active signals
  252. def Decompose(self, reduced=True, xonly=False, cksize=2**20):
  253. pini("Data Moments")
  254. N = self.Factory["Nxfrm"]
  255. Nb = self.Factory["Nxfrm"] if xonly else self.Factory["Nbasis"]
  256. self.Mom = np.zeros((self.Factory["Nbasis"],))
  257. self.MomX = np.zeros((self.Factory["Nxfrm"],))
  258. self.Mom[:Nb] = self.Factory.CachedDecompose(self.x, self.w, str(self.uid), cksize=cksize, Nbasis=Nb)
  259. self.Full = TruncatedSeries(self.Factory, self.Mom, self.Neff, Nmax=N )
  260. self.TestS = TruncatedSeries(self.Factory, self.Mom, self.Neff, Nmax=N )
  261. self.TestB = TruncatedSeries(self.Factory, self.Mom, self.Neff, Nmax=N )
  262. pend()
  263. Act = self.GetActive() if reduced else self.Signals
  264. for name in Act:
  265. pini("%s Moments" % name)
  266. self[name].Decompose(cksize=cksize, Nbasis=Nb)
  267. pend()
  268. # Solve for the raw signal estimators. Use Cholesky decomposition,
  269. # as it is much faster than the alternative solvers.
  270. @pstage("Preparing Signal Estimators")
  271. def PrepSignalEstimators(self, reduced=True, verbose=False):
  272. D = getattr(self, self.attr).copy()
  273. n, N = self.N, D.size
  274. Act = self.GetActive() if reduced else self.Signals
  275. D[n:] = 0
  276. LCov = self.Factory.CovMatrix(D)[n:N,n:N]
  277. reg = np.diag(LCov).mean() / sqrt(self.Nint)
  278. Ch = cho_factor( LCov + reg*np.eye(N-n))
  279. if verbose: pini("Solving")
  280. for name in Act:
  281. sig = self[name]
  282. sig.Sig = getattr(sig, self.attr) # set the moments to use.
  283. sig.Res = sig.Sig[n:] # sig res
  284. sig.Est = cho_solve(Ch, sig.Res.T)
  285. if verbose: pdot()
  286. if verbose: pend()
  287. # Solve for the normalized signal estimators
  288. def NormSignalEstimators(self, *extrasignals):
  289. sl = self.GetActive(*extrasignals)
  290. Sig = np.array([ self[s].Sig for s in sl ]) # raw signals
  291. Res = np.array([ self[s].Res for s in sl ]) # sig residuals
  292. Est = np.array([ self[s].Est for s in sl ]) # sig raw estimators
  293. P = solve ( Est.dot(Res.T), Est) # normalized estimators
  294. return Sig, P
  295. # Extract the active signals
  296. def ExtractSignals(self, Data, *extrasignals):
  297. N = self.N
  298. Sig, P = self.NormSignalEstimators(self, *extrasignals)
  299. R = P.dot(Data[self.N:])
  300. self.FullSig = R.dot(Sig)
  301. self.P = P
  302. for i, name in enumerate( self.GetActive(*extrasignals) ):
  303. self[name].Moment = R[i]
  304. self[name].Yield = R[i] * self.Nint
  305. # Return covariance matrix (in events)
  306. def Covariance(self):
  307. sigs = self.GetActive()
  308. N = self.N
  309. if len(sigs) == 0:
  310. self.Cov = [[]]
  311. self.Unc = []
  312. self.Corr = [[]]
  313. else:
  314. Mf = self.Factory.CovMatrix( getattr(self, self.attr) )
  315. self.Cov = self.Nint * multi_dot (( self.P, Mf[N:,N:], self.P.T ))
  316. self.Unc = np.sqrt(np.diag(self.Cov))
  317. self.Corr = self.Cov / np.outer(self.Unc, self.Unc)
  318. for n, name in enumerate(sigs):
  319. self[name].Unc = self.Unc[n]
  320. # Set the number of moments
  321. def SetN(self, N, attr="MomX"):
  322. self.N = N
  323. Mom = getattr(self, attr)
  324. isSig = np.arange(Mom.size) >= N
  325. dMom = Mom * (~isSig)
  326. self.attr = attr
  327. if len(self.GetActive()) > 0:
  328. self.PrepSignalEstimators(verbose=False)
  329. self.ExtractSignals(Mom)
  330. else:
  331. self.FullSig = np.zeros_like(dMom)
  332. self.TestB.Set(Mom=dMom - self.FullSig * (~isSig), Nmax=N)
  333. self.TestS.Set(Mom=dMom + self.FullSig * ( isSig) )
  334. def __init__(self, x, Factory, **kwargs):
  335. w = kwargs.get('w', np.ones(x.shape[-1]))
  336. sel = (w != 0) * (x > Factory["x0"] )
  337. self.w = np.compress(sel, w, axis=-1)
  338. self.x = np.compress(sel, x, axis=-1)
  339. # Record some vital stats
  340. self.uid = self.x.dot(self.w) # Use the weighted sum as a datset identifier.
  341. self.Nint = self.w.sum()
  342. self.Neff = self.Nint**2 / np.dot(self.w, self.w)
  343. self.w /= self.Nint
  344. self.Factory = Factory
  345. self.Signals = []
  346. ParametricObject.__init__(self, **Factory.ParamVal)
  347. # Format as a list of floats joined by '_'. The
  348. # str(float()) construction ensures that numpy
  349. # singletons are p.copy()rinted consistently
  350. def __format__(self, fmt):
  351. id_str = [ str(self.uid) ] + self.GetActive()
  352. return "_".join( id_str )
  353. ###
  354. ## A parametric signal model.
  355. ###
  356. class ParametricSignal(object):
  357. def Decompose(self, cksize=2**20, Nbasis=0, **kwargs):
  358. Mom = self.CachedDecompose(cksize, Nbasis=Nbasis, **kwargs)
  359. self.Mom[:len(Mom)] = Mom
  360. self.Mom[len(Mom):] = 0
  361. self.MomX[:] = 0
  362. # Decompose the signal sample data.
  363. @Cache.Element("{self.Factory.CacheDir}", "Decompositions", "{self.Factory}", "{self.name}.npy")
  364. def CachedDecompose(self, cksize=2**20, **kwargs):
  365. Nb = kwargs.pop("Nbasis", 0)
  366. Mom = np.zeros((self.Factory["Nbasis"],))
  367. sumW = 0.0
  368. sumW2 = 0.0
  369. Neff = 0
  370. cksize = min(cksize, self.Npt)
  371. Fn = self.Factory.Fn(np.zeros((cksize,)), w=np.zeros((cksize,)),
  372. Nbasis=Nb if Nb > 0 else self.Factory["Nbasis"])
  373. while Neff < self.Npt:
  374. Fn['x'] = np.random.normal(loc=self.mu, scale=self.sigma, size=cksize)
  375. Fn['w'] = self.func(Fn['x']) / self._gauss(Fn['x'])
  376. k = np.nonzero(Fn['x'] <= self.Factory["x0"])
  377. Fn['w'][k] = 0
  378. Fn['x'][k] = 2*self.Factory["x0"]
  379. sumW += Fn['w'].sum()
  380. sumW2 += np.dot(Fn['w'], Fn['w'])
  381. Neff = int(round( (sumW*sumW)/sumW2 ))
  382. for D in Fn: Mom[D.N] += D.Moment()
  383. pdot()
  384. return Mom / sumW
  385. # Gaussian PDF
  386. def _gauss(self, x):
  387. u, s = self.mu, self.sigma
  388. return np.exp( -0.5*( (x-u)/s )**2 ) / sqrt(2*pi*s*s)
  389. # Set a signal model and generate
  390. def __init__(self, Factory, name, func, mu, sigma, **kwargs):
  391. self.Factory = Factory
  392. self.name = name
  393. self.func = func
  394. self.mu = mu
  395. self.sigma = sigma
  396. self.Mass = float(kwargs.get("Mass", self.mu))
  397. self.Npt = int( kwargs.get("NumPoints", 2**22))
  398. self.Active = bool( kwargs.get("Active", False))
  399. self.Moment = 0
  400. self.Yield = 0
  401. self.Mom = np.zeros((self.Factory["Nbasis"],))
  402. self.MomX = np.zeros((self.Factory["Nxfrm"],))
  403. ###
  404. ## A truncated orthonormal series
  405. ###
  406. class TruncatedSeries(object):
  407. # Evaluate series
  408. def __call__(self, x, trunc=False):
  409. Mom = self.MomAct if trunc else self.MomU
  410. Val = np.zeros_like(x)
  411. w = np.ones_like(x)
  412. for D in self.Factory.Fn(x, w):
  413. if D.N >= self.Nmin:
  414. Val += D.Values() * Mom[D.N]
  415. return Val
  416. # Get the common index range between self and other
  417. def _ci(self, othr):
  418. return ( max(self.Nmin, othr.Nmin),
  419. min(self.Nmax, othr.Nmax))
  420. # Get the entropy of this TruncatedSeries.
  421. def Entropy(self):
  422. j = kwargs.get('j', self.Nmin)
  423. k = kwargs.get('k', self.Nmax)
  424. k = min(k, self.Ncov/2)
  425. Cov = self.Cov[j:k,j:k]
  426. return slogdet(2*pi*e*Cov)[1] / 2
  427. # Dkl(othr||self) --> prior.KL(posterior). If specified, scale the
  428. # statistical precision of 'self' by Scale
  429. def KL(self, othr):
  430. j, k = self._ci(othr)
  431. k = min(k, self.Ncov/2, othr.Ncov/2)
  432. delta = self.MomAct[j:k] - othr.MomAct[j:k]
  433. ChSelf = cho_factor(self.Cov[j:k,j:k])
  434. h = cho_solve(ChSelf, delta)
  435. r = cho_solve(ChSelf, othr.Cov[j:k,j:k])
  436. return (np.trace(r) + delta.dot(h) - slogdet(r)[1] - k + j) / 2
  437. #Log-likelihood of othr with respect to self.
  438. def Chi2(self, othr):
  439. j, k = self._ci(othr)
  440. k = min(k, self.Ncov/2)
  441. delta = self.MomAct[j:k] - othr.MomAct[j:k]
  442. Ch = cho_factor(self.Cov[j:k,j:k])
  443. h = cho_solve(Ch, delta)
  444. return delta.dot(h) / 2
  445. # Negative log-likelihood of othr with respect to self.
  446. def LogP(self, othr):
  447. j, k = self._ci(othr)
  448. k = min(k, self.Ncov/2)
  449. delta = self.MomAct[j:k] - othr.MomAct[j:k]
  450. Ch = cho_factor(self.Cov[j:k,j:k])
  451. h = cho_solve(Ch, delta)
  452. l = 2*np.log(np.diag(Ch[0])).sum()
  453. return ( (k-j)*np.log(2*pi) + l + delta.dot(h)) / 2
  454. # Set the number of active moments.
  455. def Set(self, **kwargs):
  456. self.Nmin = kwargs.get('Nmin', self.Nmin)
  457. self.Nmax = kwargs.get('Nmax', self.Nmax)
  458. self.Mom = kwargs.get('Mom', self.Mom).copy()
  459. Ncov = self.Mom.size
  460. # Truncate or pad with zeros as necessary. Keep a copy of the original.
  461. self.MomU = self.Mom.copy()
  462. self.Mom.resize( (self.Factory["Nbasis"],), )
  463. R = np.arange(self.Mom.size, dtype=np.int)
  464. self.MomAct = self.Mom * (self.Nmin <= R) * (R < self.Nmax)
  465. # Build covariance matrix
  466. N = self.Cov.shape[0]
  467. self.Mx = self.Factory.MomMx( self.Mom, self.Nmin, self.Nmax, out=self.Mx)
  468. self.Cov = (self.Mx - np.outer(self.MomAct[:N], self.MomAct[:N])) / self.StatPre
  469. self.Ncov = Ncov
  470. # Set the moments from an iterator
  471. def FromIter(self, iter):
  472. Mom = np.asarray( [ D.Moment() for D in iter ] )
  473. self.Set(Mom=Mom)
  474. # Initialize by taking the decomposition of the dataset `basis'.
  475. # Store all moments. The `active' moments are [self.Nmin, self.Nmax)
  476. def __init__(self, Factory, Moments, StatPre=1, **kwargs):
  477. self.Factory = Factory
  478. self.StatPre = StatPre
  479. self.Nmin = kwargs.get("Nmin", 1)
  480. self.Nmax = kwargs.get("Nmax", Factory["Nbasis"])
  481. Nbasis = Factory["Nbasis"]
  482. Nxfrm = Factory["Nxfrm"]
  483. self.Mx = np.zeros( (Nxfrm, Nxfrm) )
  484. self.Cov = np.zeros( (Nxfrm, Nxfrm) ) # Covariance (weighted)
  485. self.MomAct = np.zeros( (Nbasis,) ) # Weighted, truncated moments
  486. self.MomU = Moments.copy()
  487. self.Mom = Moments.copy()
  488. self.Set()