asimov_fit.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. import os
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from scipy.optimize import curve_fit
  5. from atlas_fit_function import atlas_invMass_mumu
  6. data_path = 'DATA/generated_histograms'
  7. histo_name = 'hist_range_110-160_nbin-25_Asimov'
  8. labels = ['Background', 'Signal', 'Data']
  9. save_figs = False
  10. #############################################################################################################################
  11. # Load data
  12. pldict = {}
  13. for label in labels:
  14. fileName = os.path.join(data_path, histo_name + label + '.npz')
  15. with np.load(fileName, 'rb') as data:
  16. bin_centers = data['bin_centers']
  17. bin_edges = data['bin_edges']
  18. bin_values = data['bin_values']
  19. bin_errors = data['bin_errors']
  20. pldict[label] = [bin_centers, bin_edges, bin_values, bin_errors]
  21. xs = np.linspace(bin_edges[0], bin_edges[-1], 301)
  22. width = (np.max(xs) - np.min(xs)) / len(bin_centers)
  23. #############################################################################################################################
  24. # Helper fit functions
  25. def BackgroundFit(x, a, b, c, d, e):
  26. '''Some random function as a weight to the atlas_fit_function'''
  27. return atlas_invMass_mumu(np.exp(a * x) + b * x**3 + c * x**2 + d * x + e, x)
  28. def CrystalBall(x, A, aL, aR, nL, nR, mCB, sCB):
  29. '''Double-Sided Crystal Ball Function, see e.g. arXiv:2009.04363v2'''
  30. np.seterr(all="ignore") # Turn off some annoying warnings
  31. return np.piecewise(x, [(x - mCB) / sCB <= -aL,
  32. (x - mCB) / sCB >= aR],
  33. [lambda x: A * (nL / np.abs(aL))**nL * np.exp(-aL**2 / 2) * (nL / np.abs(aL) - np.abs(aL) - (x - mCB) / sCB)**(-nL),
  34. lambda x: A * (nR / np.abs(aR))**nR * np.exp(-aR**2 / 2) * (nR / np.abs(aR) - np.abs(aR) + (x - mCB) / sCB)**(-nR),
  35. lambda x: A * np.exp(-(x - mCB)**2 / (2 * sCB**2))
  36. ])
  37. ###########################################################################################################################
  38. # Fit background from data
  39. bin_centers = pldict['Data'][0]
  40. bin_edges = pldict['Data'][1]
  41. bin_values = pldict['Data'][2]
  42. bin_errors = pldict['Data'][3]
  43. xerrs = 0.5 * (bin_edges[1:] - bin_edges[:-1])
  44. # Blind signal region
  45. signal_region = (117, 135)
  46. mask = (bin_centers < signal_region[0]) | (bin_centers > signal_region[1])
  47. bin_centers_masked = bin_centers[mask]
  48. bin_values_masked = bin_values[mask]
  49. bin_errors_masked = bin_errors[mask]
  50. xerrs_masked = xerrs[mask]
  51. popt_masked, pcov = curve_fit(BackgroundFit, bin_centers_masked, bin_values_masked, sigma=bin_errors_masked, p0=[0.23, -3.5e10, 1.2e13, -1.5e15, 6e16])
  52. f, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(16, 9), gridspec_kw={'height_ratios': [3, 1]})
  53. f.suptitle('Fitting background from data', fontsize=22)
  54. ax1.errorbar(bin_centers, bin_values, bin_errors, xerrs, marker='o', markersize=5, color='k', ecolor='k', ls='', label='Data')
  55. ax1.plot(xs, BackgroundFit(xs, *popt_masked), 'r-', label='Blinded Data fit')
  56. ax1.axvspan(signal_region[0], signal_region[1], color='gainsboro', lw=2, ls='--', ec='k')
  57. ax1.set_ylabel('Number of events', fontsize=20)
  58. ax1.text(123, 2 * 10**5, "Blinded area", size=20)
  59. ax1.tick_params(axis='both', which='major', labelsize=20)
  60. ax1.legend(fontsize=20)
  61. # ax1.set_yscale('log')
  62. ax2.bar(bin_centers_masked, bin_values_masked / BackgroundFit(bin_centers_masked, *popt_masked) - 1, width=width, color='k')
  63. ax2.axhline(0, color='k', ls='--', alpha=0.7)
  64. ax2.set_xlabel(r'$m_{\mu \mu}$', fontsize=20)
  65. ax2.set_ylabel('(Data-Pred.)/Pred.', fontsize=20)
  66. ax2.set_xticks(bin_edges[::2])
  67. ax2.tick_params(axis='both', which='major', labelsize=20)
  68. ax2.grid(True)
  69. f.tight_layout()
  70. if save_figs:
  71. plt.savefig('DataBkgFit.pdf')
  72. #############################################################################################################################
  73. # Fit simiulated signal with the crystal ball function
  74. bin_centers = pldict['Signal'][0]
  75. bin_edges = pldict['Signal'][1]
  76. bin_values = pldict['Signal'][2]
  77. bin_errors = pldict['Signal'][3]
  78. popt_CB, pcov = curve_fit(CrystalBall, bin_centers, bin_values, sigma=bin_errors, p0=[2.6 * 10**4., 1.5, 1.5, 5., 5., 124.6, 2.5])
  79. f, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(16, 12), gridspec_kw={'height_ratios': [3, 1]})
  80. f.suptitle('Fitting inflated simulated signal', fontsize=22)
  81. ax1.scatter(bin_centers, bin_values, color='k', label='Simulated 100x signal')
  82. ax1.plot(xs, CrystalBall(xs, *popt_CB), color='r', label='Fitted signal')
  83. ax1.axvline(popt_CB[-2], color='k', ls='--', alpha=0.3)
  84. ax1.annotate(r'$m_{{Higgs}}^{{fit}}= {:.2f}$ GeV'.format(popt_CB[-2]) + '\n' + r'$N_{{Higgs}}^{{exp}} = {:d}$'.format(int(popt_CB[0])), (popt_CB[-2], 10000), xytext=(148, 2000), fontsize=20, arrowprops=dict(facecolor='black', shrink=0.05), size=20, bbox=dict(facecolor='w', edgecolor='gray', boxstyle='round,pad=0.5'))
  85. string = 'CB parameters after fit:\n'
  86. for par, pop in zip(['A', r'$\alpha_L$', r'$\alpha_R$', r'$n_L$', r'$n_R$', r'$\overline{m}_{CB}$', r'$\sigma_{CB}$'], popt_CB):
  87. string += par + f': {pop:.3f}\n'
  88. ax1.text(148, 10**4, string[:-1], size=20, bbox=dict(facecolor='w', edgecolor='gray', boxstyle='round,pad=0.5'))
  89. ax1.legend(loc='upper right', fontsize=20)
  90. ax1.set_ylabel('Number of events', fontsize=20)
  91. ax1.set_xticks(bin_edges[::2])
  92. ax1.tick_params(axis='both', which='major', labelsize=20)
  93. ax1.grid(True)
  94. ax2.bar(bin_centers, bin_values / CrystalBall(bin_centers, *popt_CB) - 1, width=width, color='k')
  95. ax2.axhline(0, color='k', ls='--', alpha=0.7)
  96. ax2.set_xlabel(r'$m_{\mu \mu}$', fontsize=20)
  97. ax2.set_ylabel('(Data-Pred.)/Pred.', fontsize=20)
  98. ax2.set_xticks(bin_edges[::2])
  99. ax2.tick_params(axis='both', which='major', labelsize=20)
  100. ax2.grid(True)
  101. f.tight_layout()
  102. if save_figs:
  103. plt.savefig('AsimovSimSignalFit.pdf')
  104. #############################################################################################################################
  105. # Extract and fit our signal
  106. # Signal = Data - Fitted Background
  107. bin_values = pldict['Data'][2]
  108. extracted_signal = bin_values - BackgroundFit(bin_centers, *popt_masked)
  109. bin_centers = pldict['Signal'][0]
  110. bin_edges = pldict['Signal'][1]
  111. bin_values = pldict['Signal'][2]
  112. bin_errors = pldict['Signal'][3]
  113. def scale_signal(x, scale, popt_CB=popt_CB):
  114. return scale * CrystalBall(x, *popt_CB)
  115. # Fit extracted signal with the crystal ball function
  116. popt_final, pcov = curve_fit(scale_signal, bin_centers, extracted_signal, sigma=bin_errors, p0=[1.])
  117. NHiggs = int(popt_CB[0] * popt_final[0])
  118. f, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(16, 12), gridspec_kw={'height_ratios': [3, 1]})
  119. f.suptitle('Fitting extracted signal', fontsize=22)
  120. ax1.plot(xs, scale_signal(xs, *popt_final), color='r', label='Fitted signal')
  121. ax1.scatter(bin_centers, extracted_signal, color='k', label='Extracted signal')
  122. ax1.axvline(popt_CB[-2], color='k', ls='--', alpha=0.3)
  123. ax1.text(110, 15000, r'$\alpha_{{scale}} = {:.3f}$'.format(*popt_final) + '\n' + r'$N_{{Higgs}} = {:d}$'.format(NHiggs), size=25, bbox=dict(facecolor='w', edgecolor='gray', boxstyle='round,pad=0.5'))
  124. ax1.legend(loc='upper right', fontsize=20)
  125. ax1.set_ylabel('Number of events', fontsize=20)
  126. ax1.set_xticks(bin_edges[::2])
  127. ax1.tick_params(axis='both', which='major', labelsize=20)
  128. ax1.set_xlim((108, 140)) # Plot only relevant part of the signal
  129. ax1.grid(True)
  130. ax2.bar(bin_centers, extracted_signal / scale_signal(bin_centers, *popt_final) - 1, width=width, color='k')
  131. ax2.axhline(0, color='k', ls='--', alpha=0.7)
  132. ax2.set_xlabel(r'$m_{\mu \mu}$', fontsize=20)
  133. ax2.set_ylabel('(Data-Pred.)/Pred.', fontsize=20)
  134. ax2.set_xticks(bin_edges[::2])
  135. ax2.tick_params(axis='both', which='major', labelsize=20)
  136. ax2.set_xlim((108, 140)) # Plot only relevant part of the signal
  137. ax2.set_ylim((-2, 2))
  138. ax2.grid(True)
  139. f.tight_layout()
  140. if save_figs:
  141. plt.savefig('AsimovSignalFit.pdf')