create_histograms.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import os
  2. import sys
  3. import numpy as np
  4. import pandas as pd
  5. # User defined: -> play around with n_bins and x_range
  6. #################################################################################
  7. datadir_input = "DATA/raw_data/" # Directory to raw data, change this!
  8. datadir_output = "DATA/generated_histograms/" # Directory to generated histograms, change/create this!
  9. x_range = (110, 160) # m_mumu energy interval (110.,160.) for Higgs
  10. n_bins = 50 # number of bins for histograms
  11. #################################################################################
  12. ds_bkg = "mc_bkg_new" # filename for Background simulations (.h5)
  13. ds_sig = "mc_sig" # filename for Signal simulations (.h5)
  14. ds_data = "data" # filename for Measured data (.h5)
  15. datasets = [ds_bkg, ds_sig, ds_data]
  16. labels = ["Background", "Signal", "Data"]
  17. def load_data(dataset, datadir):
  18. """Function for loading .h5 Atlas datasets"""
  19. infile = os.path.join(datadir, dataset + '.h5')
  20. print('Loading {}...'.format(infile))
  21. store = pd.HDFStore(infile, 'r')
  22. dataset = store['ntuple']
  23. return dataset, store
  24. for label, dataset in zip(labels, datasets):
  25. # Load dataset
  26. ds, file = load_data(dataset, datadir_input)
  27. # Get simulated (Background, Signal) or measured (Data) data
  28. all_events = ds["Muons_Minv_MuMu_Paper"]
  29. # Get correct weights
  30. wts = ds["CombWeight"]
  31. wts2 = wts * wts
  32. # Firstly, get correct number of bin_values
  33. bin_values, _ = np.histogram(all_events, bins=n_bins, range=x_range, weights=wts) # wts!
  34. # Secondly, calculate bin_errors
  35. y, bin_edges = np.histogram(all_events, bins=n_bins, range=x_range, weights=wts2) # wts2!
  36. bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
  37. bin_errors = np.sqrt(y)
  38. # Finally, save several arrays into a single file in uncompressed .npz format
  39. save_name = datadir_output + 'hist_range_' + str(x_range[0]) + '-' + str(x_range[1]) + '_nbin-' + str(n_bins) + '_' + label + '.npz'
  40. with open(save_name, 'wb') as f:
  41. np.savez(f, bin_edges=bin_edges, bin_centers=bin_centers, bin_values=bin_values, bin_errors=bin_errors)
  42. f.close()