12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- import os
- import sys
- import numpy as np
- import pandas as pd
- # User defined: -> play around with n_bins and x_range
- #################################################################################
- datadir_input = "DATA/raw_data/" # Directory to raw data, change this!
- datadir_output = "DATA/generated_histograms/" # Directory to generated histograms, change/create this!
- x_range = (110, 160) # m_mumu energy interval (110.,160.) for Higgs
- n_bins = 50 # number of bins for histograms
- #################################################################################
- ds_bkg = "mc_bkg_new" # filename for Background simulations (.h5)
- ds_sig = "mc_sig" # filename for Signal simulations (.h5)
- ds_data = "data" # filename for Measured data (.h5)
- datasets = [ds_bkg, ds_sig, ds_data]
- labels = ["Background", "Signal", "Data"]
- def load_data(dataset, datadir):
- """Function for loading .h5 Atlas datasets"""
- infile = os.path.join(datadir, dataset + '.h5')
- print('Loading {}...'.format(infile))
- store = pd.HDFStore(infile, 'r')
- dataset = store['ntuple']
- return dataset, store
- for label, dataset in zip(labels, datasets):
- # Load dataset
- ds, file = load_data(dataset, datadir_input)
- # Get simulated (Background, Signal) or measured (Data) data
- all_events = ds["Muons_Minv_MuMu_Paper"]
- # Get correct weights
- wts = ds["CombWeight"]
- wts2 = wts * wts
- # Firstly, get correct number of bin_values
- bin_values, _ = np.histogram(all_events, bins=n_bins, range=x_range, weights=wts) # wts!
- # Secondly, calculate bin_errors
- y, bin_edges = np.histogram(all_events, bins=n_bins, range=x_range, weights=wts2) # wts2!
- bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
- bin_errors = np.sqrt(y)
- # Finally, save several arrays into a single file in uncompressed .npz format
- save_name = datadir_output + 'hist_range_' + str(x_range[0]) + '-' + str(x_range[1]) + '_nbin-' + str(n_bins) + '_' + label + '.npz'
- with open(save_name, 'wb') as f:
- np.savez(f, bin_edges=bin_edges, bin_centers=bin_centers, bin_values=bin_values, bin_errors=bin_errors)
- f.close()
|