12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- import numpy as np
- import sklearn.metrics as mt
- def ECE(predicted_labels, confidences, true_labels, M=5):
-
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
-
- accuracies = predicted_labels == true_labels
- ece = np.zeros(1)
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
-
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences <= bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
- return ece[0]
- def MCE(predicted_labels, confidences, true_labels, M=5):
- bin_boundaries = np.linspace(0, 1, M + 1)
- bin_lowers = bin_boundaries[:-1]
- bin_uppers = bin_boundaries[1:]
-
- accuracies = predicted_labels == true_labels
- mces = []
- for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
-
- in_bin = np.logical_and(
- confidences > bin_lower.item(), confidences < bin_upper.item()
- )
- prob_in_bin = in_bin.mean()
- if prob_in_bin > 0:
- accuracy_in_bin = accuracies[in_bin].mean()
- avg_confid = confidences[in_bin].mean()
- mces.append(np.abs(avg_confid - accuracy_in_bin))
- return max(mces)
- def F1(predicted_labels, true_labels):
- tp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 1))
- fp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 0))
- fn = np.sum(np.logical_and(predicted_labels == 0, true_labels == 1))
- precision = tp / (tp + fp)
- recall = tp / (tp + fn)
- return 2 * (precision * recall) / (precision + recall)
- def AUC(confidences, true_labels):
- fpr, tpr, _ = mt.roc_curve(true_labels, confidences)
- return mt.auc(fpr, tpr)
- def entropy(confidences):
- return -np.sum(confidences * np.log(confidences))
- def nll_binary(confidences, true_labels):
- return -np.sum(np.log(confidences[true_labels == 1])) - np.sum(np.log(1 - confidences[true_labels == 0]))
- def brier_binary(confidences, true_labels):
- return np.mean((confidences - true_labels) ** 2)
|