12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- import numpy as np
- import sklearn.metrics as mt
- # ECE from https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d
- def ECE(predicted_labels, confidences, true_labels, M=5):
- # Uniform M bins
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
- # get correct/false
- accuracies = predicted_labels == true_labels
- ece = np.zeros(1)
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
- # bin sample
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences <= bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
- return ece[0]
- # Maximum Calibration error - maximum of error per bin
- def MCE(predicted_labels, confidences, true_labels, M=5):
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
- # get correct/false
- accuracies = predicted_labels == true_labels
- mces = []
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
- # bin sample
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences < bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- mces.append(np.abs(avg_confid - accuracy_in_bin))
- return max(mces)
- def F1(predicted_labels, true_labels):
- tp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 1))
- fp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 0))
- fn = np.sum(np.logical_and(predicted_labels == 0, true_labels == 1))
- precision = tp / (tp + fp)
- recall = tp / (tp + fn)
- return 2 * (precision * recall) / (precision + recall)
- # Uses sklearn's AUC function
- # Requieres confidences to be the predicted probabilities for the positive class
- def AUC(confidences, true_labels):
- fpr, tpr, _ = mt.roc_curve(true_labels, confidences)
- return mt.auc(fpr, tpr)
- def entropy(confidences):
- return -np.sum(confidences * np.log(confidences))
- ### Negative Log Likelyhood for binary classification
- def nll_binary(confidences, true_labels):
- return -np.sum(np.log(confidences[true_labels == 1])) - np.sum(np.log(1 - confidences[true_labels == 0]))
- ### Breier score for binary classification
- def brier_binary(confidences, true_labels):
- return np.mean((confidences - true_labels) ** 2)
|