fd_scan.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #!/usr/bin/env python
  2. import os, sys, signal, argparse, ConfigParser
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import Plots.Plots as Plots
  6. from Tools.Decomp import DataSet, Optimizer, SignalScan
  7. from Tools.OrthExp import ExpDecompFactory
  8. from Tools.ConfigIter import ConfigIter
  9. from Tools.PrintMgr import *
  10. from Plots.Plots import *
  11. from matplotlib.backends.backend_pdf import PdfPages
  12. # Load a specified variable from base dir
  13. def loadVar(varName, DSList, Dec):
  14. Path = [ os.path.join(ArgC.base, "Data", n, varName + ".npy") for n in DSList ]
  15. Ar = [ np.load(p, mmap_mode = 'r')[::int(d)] for p, d in zip(Path, Dec) ]
  16. return np.concatenate(Ar)
  17. # Test a condition on on cached variables
  18. def testVar(cond, DSList, Dec):
  19. Keep = []
  20. for n, d in zip(DSList, Dec):
  21. Path = os.path.join ( ArgC.base, "Data", n )
  22. Names = [ os.path.splitext(k)[0] for k in os.listdir ( Path ) ]
  23. Full = { k : os.path.join(Path, k + ".npy") for k in Names }
  24. Vars = { k : np.load(Full[k], mmap_mode = 'r')[::int(d)] for k in Names }
  25. Keep.append(eval(cond, Vars))
  26. return np.concatenate(Keep)
  27. # Evaluate a signal function string.
  28. def _fmtSignalFunc(x, **kwargs):
  29. try:
  30. return eval(x.format(**kwargs))
  31. except AttributeError:
  32. return x
  33. def _addSignalFunc(D, func, name, M, **kw):
  34. Active = kw.get("Active", (M is None))
  35. arg = { k : _fmtSignalFunc(v, Mass=M, **kw) for k, v in c.items() }
  36. ffunc = _fmtSignalFunc(func, **arg)
  37. D.AddParametricSignal(name, ffunc, Active=Active, **arg)
  38. ########################################
  39. ########################################
  40. # Stop stack trace on ctrl-c
  41. def nice_quit(signal, frame):
  42. sys.exit(0)
  43. signal.signal(signal.SIGINT, nice_quit)
  44. # Lower default NP print precision.
  45. np.set_printoptions(precision=4)
  46. ######
  47. # Parse command line parameters and config files
  48. ######
  49. ArgP = argparse.ArgumentParser(description='=== Functional Decomposition Fitter ===')
  50. ArgP.add_argument('--base', type=str, default=".", help="FD base directory.")
  51. ArgP.add_argument('--show', action="store_true", help="Display plots interactively.")
  52. ArgC = ArgP.parse_args()
  53. Config = ConfigParser.ConfigParser()
  54. Config.optionxform = str
  55. Config.read( os.path.join(ArgC.base, "base.conf") )
  56. PlotStyle = Config.get("General", "PlotStyle")
  57. try: plt.style.use( PlotStyle )
  58. except IOError: plt.style.use( os.path.join(ArgC.base, PlotStyle) )
  59. # Reduce numpy default print precision to make logs a bit neater.
  60. np.set_printoptions(precision=2)
  61. ######
  62. # Initialize the factory and signal models.
  63. ######
  64. # Get hyperparameter scan range
  65. ranges = { p : r.rstrip("j") for p, r in Config.items("HyperParameterScan") }
  66. rcomp = [ ranges[p].split(':')for p in ExpDecompFactory._fitparam ]
  67. rlist_xfrm = [] # The hyperparameter scan range for transformations
  68. rlist_dcmp = [] # The scan range for direct decompositions
  69. for param, r in zip(ExpDecompFactory._fitparam, rcomp) :
  70. start = min( float(r[0]), float(r[1]))
  71. stop = max( float(r[0]), float(r[1]))
  72. step_x = float(r[2])
  73. step_d = float(r[3]) if len(r) > 3 else 1
  74. dcmp = np.linspace (start, stop, step_d)
  75. xidx = np.linspace ( 0, step_d-1, step_x)
  76. xfrm_r = np.linspace (start, stop, step_x)
  77. if param == 'Alpha':
  78. dcmp_idx = np.round(xidx-0.5, 0).astype(int)
  79. elif param == 'Lambda':
  80. dcmp_idx = np.round(xidx+0.5, 0).astype(int)
  81. dcmp_idx = np.clip(dcmp_idx, 0, dcmp.size-1)
  82. dcmp_r = dcmp[ dcmp_idx ]
  83. rlist_xfrm.append( xfrm_r )
  84. rlist_dcmp.append( dcmp_r )
  85. Ax, Lx = np.meshgrid(*rlist_xfrm)
  86. Ad, Ld = np.meshgrid(*rlist_dcmp)
  87. # Create Factory configuration
  88. fConf = { k: eval(v) for k, v in Config.items('ExpDecompFactory') }
  89. for p, r in zip(ExpDecompFactory._fitparam, rlist_dcmp):
  90. fConf[p] = r[0]
  91. fConf['CacheDir'] = os.path.join(ArgC.base, "Cache")
  92. # Read input variables.
  93. SetList = Config.items("InputFiles")
  94. General = { p: r for p, r in Config.items("General") }
  95. varName = General.get("Variable")
  96. wgtName = General.get("Weight")
  97. Scale = General.get("Scale")
  98. Lumi = General.get("Lumi", 0.0)
  99. LumiUnc = General.get("LumiUnc", 0.0)
  100. XSecUnits = General.get("XSecUnits") if Lumi > 0 else "Events"
  101. NumOpt = General.get("NumOptSteps", 1)
  102. print
  103. print
  104. print "Input files:", " ".join( [n for n, d in SetList] )
  105. x = loadVar(varName, *zip(*SetList))
  106. w = loadVar(wgtName, *zip(*SetList)) * float(Scale)
  107. cutflow = [ ("Initial", w.sum()) ]
  108. for name, cond in Config.items("Cuts"):
  109. w *= testVar(cond, *zip(*SetList))
  110. cutflow.append((name, w.sum()))
  111. # Create objects
  112. Factory = ExpDecompFactory( **fConf )
  113. Tr = Factory.Xfrm()
  114. D = DataSet(x, Factory, w=w)
  115. FOpt = Optimizer(Factory, D)
  116. Names = {}
  117. Scans = {}
  118. # Initialize signal peaks
  119. for c, name in ConfigIter(Config, "ParametricSignal", "ParametricSignalDefault"):
  120. func = "lambda x:" + c.pop("func")
  121. Scan = c.pop("Scan", None)
  122. if Scan is None:
  123. _addSignalFunc(D, func, name, None, **c)
  124. else:
  125. Names[name] = [ name + "%.3f" % M for M in Scan ]
  126. for M, fname in zip( Scan, Names[name] ):
  127. _addSignalFunc(D, func, fname, M, **c)
  128. print
  129. ######
  130. # Decompose
  131. ######
  132. LLH, PBest = FOpt.ScanW(Ax, Lx, Ad, Ld)
  133. dA = Ax[0,1] - Ax[0,0]
  134. dL = Lx[1,0] - Lx[0,0]
  135. for _ in range(1):
  136. Ab = PBest['Alpha']
  137. Lb = PBest['Lambda']
  138. ini = np.asarray( [ (Ab-dA, Lb-dL), (Ab-dA, Lb+dL), (Ab+dA, Lb) ] )
  139. Factory.update( PBest )
  140. D.Decompose(xonly=True)
  141. NBest, LBest, PBest = FOpt.FitW( initial_simplex = ini)
  142. ### Re-decompose with the full expansion and extract signals.
  143. Factory.update( PBest)
  144. D.Decompose(reduced=False)
  145. FOpt.UpdateXfrm(**PBest)
  146. ### attr="Mom": use the full expansion from here on out
  147. D.SetN(N=NBest, attr="Mom")
  148. D.Covariance()
  149. if len(D.Signals) > 0:
  150. D.PrepSignalEstimators(reduced=False, verbose=True)
  151. ### Calculate yields, uncertainties and CLs
  152. fmt = {
  153. "wht": "\x1b[37m %8.1f \x1b[0m",
  154. "grn": "\x1b[32m %8.1f \x1b[0m",
  155. "yel": "\x1b[33m %8.1f \x1b[0m",
  156. "red": "\x1b[31m %8.1f \x1b[0m",
  157. }
  158. lfmt = [ "red", "yel", "grn", "wht", "grn", "yel", "red" ]
  159. print
  160. print
  161. print "=====> YIELD AND LIMITS <====="
  162. print
  163. print "%-16s: %8s +- %8s (%5s) [ %9s ] [ %9s %9s %9s %9s %9s ]" % (
  164. "Signal", "Yield", "Unc", "Sig.", "Obs. CL95",
  165. "-2sigma", "-1sigma", "Exp. CL95", "+1sigma", "+2sigma")
  166. for scan_name, sig_names in Names.items():
  167. Scans[scan_name] = SignalScan(Factory, D, *sig_names, Lumi=Lumi, LumiUnc=LumiUnc)
  168. t1 = time.time()
  169. for name, yld, unc, obs, exp in Scans[scan_name]:
  170. t2 = time.time()
  171. sig = yld / unc
  172. isig = 3 + np.clip(int(sig) + (1 if sig > 0 else -1), -3, 3)
  173. print "%-16s: %8.1f +- % 8.1f (% 4.2f)" % (name, yld, unc, sig),
  174. print "[", fmt[lfmt[ isig ]] % obs, "] [",
  175. for l, e in zip(lfmt[1:-1], exp):
  176. print fmt[l] % e,
  177. print "] (%4.2fs)" % (t2-t1)
  178. t1 = time.time()
  179. ######
  180. # Output fit results
  181. ######
  182. Nxfrm = Factory["Nxfrm"]
  183. print
  184. print
  185. print "=====> CUTFLOW <====="
  186. for c in cutflow:
  187. print "% 12s: %.2f" % c
  188. print
  189. print
  190. print "=====> SIGNAL RESULTS <====="
  191. for name in D.GetActive():
  192. s = D[name]
  193. print "% 12s: %.2f +- %.2f" % (name, s.Yield, s.Unc)
  194. print
  195. print
  196. print "=====> COVARIANCE < ====="
  197. print D.Cov
  198. print
  199. print
  200. print "=====> CORRELATION < ====="
  201. print D.Corr
  202. print
  203. print
  204. print "=====> RAW MOMENTS <====="
  205. for n in range(33):
  206. print "% 3d: %+.3e " % (n, D.Mom[n]),
  207. if n % 4 == 3: print
  208. print
  209. print
  210. print "=====> BACKGROUND COEFFICIENTS <====="
  211. for n in range(D.N):
  212. print "% 3d: %+.3e " % (n, D.TestB.Mom[n]),
  213. if n % 4 == 3: print
  214. print
  215. print
  216. ######
  217. # Plotting
  218. ######
  219. print
  220. print
  221. print "=====> PLOTTING < ====="
  222. def op(*x):
  223. return os.path.join(ArgC.base, "Output", *x)
  224. try:
  225. os.mkdir(op())
  226. except OSError:
  227. pass
  228. pdf = PdfPages(op('all.pdf'))
  229. ini = fConf["Lambda"], fConf["Alpha"]
  230. fin = Factory["Lambda"], Factory["Alpha"]
  231. Plots.cutflow (cutflow, pdf=pdf, fname=op('cutflow.pdf'))
  232. if LLH.size > 3:
  233. Plots.scan (Lx, Ax, LLH, LBest, Ld, Ad, fin, pdf=pdf, fname=op('hyperparameter_scan_400.pdf'))
  234. Plots.scan (Lx, Ax, LLH, LBest, Ld, Ad, fin, pdf=pdf, fname=op('hyperparameter_scan_25.pdf'), maxZ=25)
  235. Plots.summary_table(D, pdf=pdf, fname=op('signal_summary.pdf'))
  236. for p, file in ConfigIter(Config, "Plot", "PlotDefault"):
  237. Pull = p.pop("DrawPull", True)
  238. try:
  239. Type = p.pop("Type")
  240. except KeyError:
  241. print "ERROR: Must specify 'Type' key in config for %s. Valid types are:" % file
  242. print " Fit (requires keys: 'Bins')"
  243. print " Scan (requires keys: 'Scans')"
  244. continue
  245. if Type == "Fit":
  246. h, res = Plots.fit (D, pdf=pdf, fname=op(file), **p)
  247. if Pull:
  248. Plots.pull (h, res, pdf=pdf, fname=op("pull-" + file) )
  249. elif Type == "Scan":
  250. Plots.mass_scan (Scans, pdf=pdf, fname=op(file), Units=XSecUnits, **p)
  251. elif Type == "Estimators":
  252. Plots.estimators (D, pdf=pdf, fname=op(file), **p)
  253. elif Type == "Moments":
  254. Plots.moments (D, pdf=pdf, fname=op(file), **p)
  255. pdf.close()