metrics.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import numpy as np
  2. # ECE from https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d
  3. def ECE(samples, true_labels, M=5):
  4. # Uniform M bins
  5. bin_boundaries = np.linspace(0, 1, M + 1)
  6. bin_lowers = bin_boundaries[:-1]
  7. bin_uppers = bin_boundaries[1:]
  8. # Get max probability per sample i
  9. confidences = samples
  10. predicted_label = true_labels
  11. # get correct/false
  12. accuracies = predicted_label == true_labels
  13. ece = np.zeros(1)
  14. for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
  15. # bin sample
  16. in_bin = np.logical_and(
  17. confidences > bin_lower.item(), confidences < bin_upper.item()
  18. )
  19. prob_in_bin = in_bin.mean()
  20. if prob_in_bin > 0:
  21. accuracy_in_bin = accuracies[in_bin].mean()
  22. avg_confid = confidences[in_bin].mean()
  23. ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
  24. return ece[0]
  25. # Maximum Calibration error - maximum of error per bin
  26. def MCE(samples, true_labels, M=5):
  27. bin_boundaries = np.linspace(0, 1, M + 1)
  28. bin_lowers = bin_boundaries[:-1]
  29. bin_uppers = bin_boundaries[1:]
  30. # Get max probability per sample i
  31. confidences = samples
  32. predicted_label = true_labels
  33. # get correct/false
  34. accuracies = predicted_label == true_labels
  35. mces = []
  36. for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
  37. # bin sample
  38. in_bin = np.logical_and(
  39. confidences > bin_lower.item(), confidences < bin_upper.item()
  40. )
  41. prob_in_bin = in_bin.mean()
  42. if prob_in_bin > 0:
  43. accuracy_in_bin = accuracies[in_bin].mean()
  44. avg_confid = confidences[in_bin].mean()
  45. mces.append(np.abs(avg_confid - accuracy_in_bin))
  46. return max(mces)