final_fit.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. import numpy as np
  2. from scipy.optimize import curve_fit
  3. import matplotlib.pyplot as plt
  4. from atlas_fit_function import atlas_invMass_mumu
  5. #############################################################################################################################
  6. # User defined
  7. labels = ['Background', 'Signal', 'Data']
  8. pldict = {}
  9. for label in labels:
  10. with np.load('DATA/original_histograms/mass_mm_higgs_' + label + '.npz', 'rb') as data:
  11. bin_centers = data['bin_centers']
  12. bin_edges = data['bin_edges']
  13. bin_values = data['bin_values']
  14. bin_errors = data['bin_errors']
  15. pldict[label] = [bin_centers, bin_edges, bin_values, bin_errors]
  16. xs = np.linspace(bin_edges[0], bin_edges[-1], 301)
  17. width = (np.max(xs) - np.min(xs)) / len(bin_centers)
  18. chebyshev = False
  19. save = False
  20. #############################################################################################################################
  21. # Fit simulated background
  22. bin_centers = pldict['Background'][0]
  23. bin_edges = pldict['Background'][1]
  24. bin_values = pldict['Background'][2]
  25. bin_errors = pldict['Background'][3]
  26. xerrs = 0.5 * (bin_edges[1:] - bin_edges[:-1])
  27. def Background(x, a, b, c, d, e, f, g, h, func=atlas_invMass_mumu):
  28. if chebyshev:
  29. # Choose Chebyshev polynomials as weights
  30. from numpy.polynomial.chebyshev import chebval
  31. return func(chebval(x, [a, b, c, d, e, f, g, h]), x)
  32. else:
  33. # Choose some weighting function
  34. return func(np.exp(a * x) + b * x**3 + c * x**2 + d * x + h, x)
  35. def return_p0(chebyshev):
  36. if chebyshev:
  37. return (10**6, 1., 1., 1., 1., 1., 1., 1.)
  38. else:
  39. return (1., 1., 1., 1., 1., 1., 1., 10**6)
  40. popt, pcov = curve_fit(Background, bin_centers, bin_values, sigma=bin_errors, p0=return_p0(chebyshev))
  41. perr = np.sqrt(np.diag(pcov))
  42. f, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(16, 9), gridspec_kw={'height_ratios': [3, 1]})
  43. f.suptitle('Fitting simulated background', fontsize=22)
  44. ax1.errorbar(bin_centers, bin_values, bin_errors, xerrs, marker='o', markersize=5, color='k', ecolor='k', ls='', label='Original SimBkg histogram')
  45. ax1.plot(xs, Background(xs, *popt), 'r-', label='Simulated Background fit')
  46. ax1.set_ylabel('Number of events', fontsize=20)
  47. ax1.tick_params(axis='both', which='major', labelsize=20)
  48. ax1.legend(fontsize=20)
  49. # ax1.set_yscale('log')
  50. ax2.bar(bin_centers, bin_values / Background(bin_centers, *popt) - 1, width=width, color='k')
  51. ax2.axhline(0, color='k', ls='--', alpha=0.7)
  52. ax2.set_xlabel(r'$m_{\mu \mu}$', fontsize=20)
  53. ax2.set_ylabel('(Data-Pred.)/Pred.', fontsize=20)
  54. ax2.set_xticks(bin_edges[::4])
  55. ax2.tick_params(axis='both', which='major', labelsize=20)
  56. ax2.grid(True)
  57. f.tight_layout()
  58. if save:
  59. plt.savefig('SimBkg_fit.pdf')
  60. ############################################################################################################################
  61. # Fit background from data
  62. bin_centers = pldict['Data'][0]
  63. bin_edges = pldict['Data'][1]
  64. bin_values = pldict['Data'][2]
  65. bin_errors = pldict['Data'][3]
  66. xerrs = 0.5 * (bin_edges[1:] - bin_edges[:-1])
  67. # Blind signal region
  68. signal_region = (bin_centers < 120) | (bin_centers > 130)
  69. bin_centers_masked = bin_centers[signal_region]
  70. bin_values_masked = bin_values[signal_region]
  71. bin_errors_masked = bin_errors[signal_region]
  72. xerrs_masked = xerrs[signal_region]
  73. popt_full, pcov_full = curve_fit(Background, bin_centers, bin_values, sigma=bin_errors, p0=return_p0(chebyshev))
  74. popt_masked, pcov = curve_fit(Background, bin_centers_masked, bin_values_masked, sigma=bin_errors_masked, p0=return_p0(chebyshev))
  75. f, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(16, 9), gridspec_kw={'height_ratios': [3, 1]})
  76. f.suptitle('Fitting background from data', fontsize=22)
  77. ax1.errorbar(bin_centers_masked, bin_values_masked, bin_errors_masked, xerrs_masked, marker='o', markersize=5, color='k', ecolor='k', ls='', label='Original Data histogram')
  78. ax1.plot(xs, Background(xs, *popt_full), 'g-', label='Data Background fit full', alpha=0.7)
  79. ax1.plot(xs, Background(xs, *popt_masked), 'r-', label='Data Background fit', alpha=0.7)
  80. # ax1.text(140, 10**5, 'Full:\na: {:.3e}\nb: {:.3f}\nc: {:.3f}\nd: {:.3e}'.format(*popt_full), fontsize=20)
  81. # ax1.text(150, 10**5, 'Blinded:\na: {:.3e}\nb: {:.3f}\nc: {:.3f}\nd: {:.3e}'.format(*popt_masked), fontsize=20)
  82. ax1.set_ylabel('Number of events', fontsize=20)
  83. ax1.tick_params(axis='both', which='major', labelsize=20)
  84. ax1.legend(fontsize=20)
  85. # ax1.set_yscale('log')
  86. ax2.bar(bin_centers_masked, bin_values_masked / Background(bin_centers_masked, *popt_masked) - 1, width=width / 2., color='g')
  87. ax2.bar(bin_centers_masked + width / 2., bin_values_masked / Background(bin_centers_masked, *popt_full) - 1, width=width / 2., color='r')
  88. ax2.axhline(0, color='k', ls='--', alpha=0.7)
  89. ax2.set_xlabel(r'$m_{\mu \mu}$', fontsize=20)
  90. ax2.set_ylabel('(Data-Pred.)/Pred.', fontsize=20)
  91. ax2.set_xticks(bin_edges[::4])
  92. ax2.tick_params(axis='both', which='major', labelsize=20)
  93. ax2.grid(True)
  94. f.tight_layout()
  95. if save:
  96. plt.savefig('DataBkg_fit.pdf')
  97. #############################################################################################################################
  98. # Extract signal (data - background fit)
  99. extracted_signal = bin_values - Background(bin_centers, *popt_masked)
  100. plt.figure(figsize=(12, 4.5))
  101. plt.title('Extracted signal', fontsize=22)
  102. plt.scatter(bin_centers, extracted_signal, color='k', label='Extracted signal')
  103. plt.xlabel(r'$m_{\mu \mu}$', fontsize=20)
  104. plt.ylabel('Number of events', fontsize=20)
  105. plt.xticks(bin_edges[::4], bin_edges[::4].astype(int), size=20)
  106. plt.yticks(size=20)
  107. plt.legend(fontsize=20)
  108. plt.tight_layout()
  109. plt.grid()
  110. if save:
  111. plt.savefig('Extracted_signal.pdf')
  112. #############################################################################################################################
  113. # Fit simulated signal
  114. bin_centers = pldict['Signal'][0]
  115. bin_edges = pldict['Signal'][1]
  116. bin_values = pldict['Signal'][2]
  117. bin_errors = pldict['Signal'][3]
  118. xerrs = 0.5 * (bin_edges[1:] - bin_edges[:-1])
  119. def CB(x, A, aL, aR, nL, nR, mCB, sCB):
  120. return np.piecewise(x, [(x - mCB) / sCB <= -aL,
  121. (x - mCB) / sCB >= aR],
  122. [lambda x: A * (nL / np.abs(aL))**nL * np.exp(-aL**2 / 2) * (nL / np.abs(aL) - np.abs(aL) - (x - mCB) / sCB)**(-nL),
  123. lambda x: A * (nR / np.abs(aR))**nR * np.exp(-aR**2 / 2) * (nR / np.abs(aR) - np.abs(aR) + (x - mCB) / sCB)**(-nR),
  124. lambda x: A * np.exp(-(x - mCB)**2 / (2 * sCB**2))
  125. ])
  126. popt_CB, pcov = curve_fit(CB, bin_centers, bin_values, sigma=bin_errors, p0=[134., 1.5, 1.5, 5., 5., 124.6, 2.7])
  127. perr = np.sqrt(np.diag(pcov))
  128. plt.figure(figsize=(16, 9))
  129. plt.title('Fitting simulated signal', fontsize=22)
  130. plt.errorbar(bin_centers, bin_values, bin_errors, xerrs, marker='o', markersize=5, color='k', ecolor='k', ls='', label='Original histogram')
  131. plt.plot(xs, CB(xs, *popt_CB), 'r-', label='signal fit CB')
  132. string = 'CB parameters after fit:\n'
  133. for par, pop in zip(['A', r'$\alpha_L$', r'$\alpha_R$', r'$n_L$', r'$n_R$', r'$\overline{m}_{CB}$', r'$\sigma_{CB}$'], popt_CB):
  134. string += par + f': {pop:.3f}\n'
  135. plt.text(115, 75, string[:-1], size=20, bbox=dict(facecolor='none', edgecolor='gray', boxstyle='round,pad=0.5'))
  136. plt.xlabel(r'$m_{\mu \mu}$', fontsize=20)
  137. plt.ylabel('Number of events', fontsize=20)
  138. plt.xticks(bin_edges[::4], bin_edges[::4].astype(int), size=20)
  139. plt.yticks(size=20)
  140. plt.legend(fontsize=20)
  141. if save:
  142. plt.savefig('CB_fit.pdf')
  143. #############################################################################################################################
  144. # Scale our signal
  145. def scale_signal(x, scale, popt_CB=popt_CB):
  146. return scale * CB(x, *popt_CB)
  147. popt, pcov = curve_fit(scale_signal, bin_centers, extracted_signal, sigma=pldict['Data'][3], p0=[1.])
  148. NHiggs = int(popt_CB[0] * popt[0])
  149. plt.figure(figsize=(12, 8))
  150. plt.title('Fitted signal', fontsize=22)
  151. plt.plot(xs, scale_signal(xs, popt), color='r', label='Fitted signal')
  152. plt.errorbar(bin_centers, extracted_signal, pldict['Data'][3], xerrs, marker='o', markersize=5, color='k', ecolor='k', ls='', label='Extracted signal')
  153. plt.text(130, -200, r'$\alpha_{{scale}} = {:.3f}$'.format(*popt) + '\n' + r'$N_{{Higgs}} = {:d}$'.format(NHiggs), size=20, bbox=dict(facecolor='w', edgecolor='gray', boxstyle='round,pad=0.5'))
  154. plt.xlabel(r'$m_{\mu \mu}$', fontsize=20)
  155. plt.ylabel('Number of events', fontsize=20)
  156. plt.xticks(bin_edges[::4], bin_edges[::4].astype(int), size=20)
  157. plt.yticks(size=20)
  158. plt.legend(loc='upper right', fontsize=20)
  159. plt.tight_layout()
  160. plt.grid()
  161. if save:
  162. plt.savefig('final_fit.pdf')