1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import numpy as np
- # ECE from https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d
- def ECE(samples, true_labels, M=5):
- # Uniform M bins
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
- # Get max probability per sample i
- confidences = samples
- predicted_label = true_labels
- # get correct/false
- accuracies = predicted_label == true_labels
- ece = np.zeros(1)
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
- # bin sample
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences < bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
- return ece[0]
- # Maximum Calibration error - maximum of error per bin
- def MCE(samples, true_labels, M=5):
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
- # Get max probability per sample i
- confidences = samples
- predicted_label = true_labels
- # get correct/false
- accuracies = predicted_label == true_labels
- mces = []
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
- # bin sample
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences < bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- mces.append(np.abs(avg_confid - accuracy_in_bin))
- return max(mces)
|