import numpy as np # ECE from https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d def ECE(samples, true_labels, M=5): # Uniform M bins bin_boundaries = np.linspace(0, 1, M + 1) bin_lowers = bin_boundaries[:-1] bin_uppers = bin_boundaries[1:] # Get max probability per sample i confidences = samples predicted_label = true_labels # get correct/false accuracies = predicted_label == true_labels ece = np.zeros(1) for bin_lower, bin_upper in zip(bin_lowers, bin_uppers): # bin sample in_bin = np.logical_and( confidences > bin_lower.item(), confidences < bin_upper.item() ) prob_in_bin = in_bin.mean() if prob_in_bin > 0: accuracy_in_bin = accuracies[in_bin].mean() avg_confid = confidences[in_bin].mean() ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin return ece[0] # Maximum Calibration error - maximum of error per bin def MCE(samples, true_labels, M=5): bin_boundaries = np.linspace(0, 1, M + 1) bin_lowers = bin_boundaries[:-1] bin_uppers = bin_boundaries[1:] # Get max probability per sample i confidences = samples predicted_label = true_labels # get correct/false accuracies = predicted_label == true_labels mces = [] for bin_lower, bin_upper in zip(bin_lowers, bin_uppers): # bin sample in_bin = np.logical_and( confidences > bin_lower.item(), confidences < bin_upper.item() ) prob_in_bin = in_bin.mean() if prob_in_bin > 0: accuracy_in_bin = accuracies[in_bin].mean() avg_confid = confidences[in_bin].mean() mces.append(np.abs(avg_confid - accuracy_in_bin)) return max(mces)