fitting_example_GPR-simple.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import numpy as np
  2. from sklearn.gaussian_process import GaussianProcessRegressor
  3. # Different kernels available in sklearn:
  4. from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern, WhiteKernel, ExpSineSquared, RationalQuadratic
  5. from matplotlib import pyplot as plt
  6. # Set random seed for same results
  7. np.random.seed(12345)
  8. # Load data
  9. inFileName = "DATA/original_histograms/mass_mm_higgs_Background.npz"
  10. with np.load(inFileName) as data:
  11. bin_edges = data['bin_edges']
  12. bin_centers = data['bin_centers']
  13. bin_values = data['bin_values']
  14. bin_errors = data['bin_errors']
  15. # Set hyper-parameter bounds for ConstantKernel
  16. nEvts = np.sum(bin_values)
  17. const0 = 1
  18. const_low = 10.0**-5
  19. const_hi = nEvts * 10.0
  20. # Set hyper-parameter bounds for RBF kernel
  21. RBF0 = 1
  22. RBF_low = 1e-2
  23. RBF_high = 1e2
  24. # A) Define kernel: ConstantKernel * RBF:
  25. kernel_RBF = ConstantKernel(const0, constant_value_bounds=(const_low, const_hi)) * RBF(RBF0, length_scale_bounds=(RBF_low, RBF_high))
  26. # B) Define kernel: ConstantKernel * Matern:
  27. kernel_Matern = ConstantKernel(const0, constant_value_bounds=(const_low, const_hi)) * Matern(RBF0, length_scale_bounds=(RBF_low, RBF_high), nu=1.5)
  28. # Transform x data into 2d vector!
  29. X = np.atleast_2d(bin_centers).T # true datapoints
  30. X_to_predict = np.atleast_2d(np.linspace(110, 160, 1000)).T # what to predict
  31. y = bin_values
  32. # Initialize Gaussian Process Regressor: !!! alpha = 2xbin_errors or 1xbin_errors ? Your task to figure out. !!!
  33. gp = GaussianProcessRegressor(kernel=kernel_RBF, n_restarts_optimizer=1, alpha=2 * bin_errors)
  34. # Fit on X with values y
  35. gp.fit(X, y)
  36. # Predict
  37. y_pred, sigma = gp.predict(X_to_predict, return_std=True)
  38. plt.title("Example GPR with RBF kernel", fontsize=14, fontweight='bold')
  39. plt.xlabel(r"$m_{\mu\mu}$ [GeV]", fontsize=12)
  40. plt.ylabel("Events/Bin", fontsize=12)
  41. plt.scatter(bin_centers, bin_values, color='r', linewidth=0.5, marker='o', s=10)
  42. plt.plot(X_to_predict, y_pred, color='k')
  43. plt.fill_between(X_to_predict.ravel(), y_pred - sigma, y_pred + sigma)
  44. plt.show()
  45. plt.savefig("GPR_simple.pdf")