1 year ago · cd26ee24c3
--- a/daily_log.md
+++ b/daily_log.md
@@ -1,4 +1,4 @@
 
															-# Daily Log 
														
 
															+# Daily Log
														
 
															 ## Friday, June 14, 2024
														
 
															 Relativly sedate day, mostly just rewriting the dataset system to ensure that loaded datasets use in evaluation are the same as the ones used in training. This ensures that there is no leakage of the train set into the validation and testing sets. Plan to meet with Ali and Brayden on Tuesday to go over results of the coverage analysis, since our results do not indicate the expected negative relationship between coverage and f1/accuracy/AUC. 
														
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -30,3 +30,33 @@ def ECE(samples, true_labels, M=5):
 
															             ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
														
 
															     return ece[0]
														
 
															+
														
 
															+
														
 
															+# Maximum Calibration error - maximum of error per bin
														
 
															+def MCE(samples, true_labels, M=5):
														
 
															+    bin_boundaries = np.linspace(0, 1, M + 1)
														
 
															+    bin_lowers = bin_boundaries[:-1]
														
 
															+    bin_uppers = bin_boundaries[1:]
														
 
															+
														
 
															+    # Get max probability per sample i
														
 
															+    confidences = samples
														
 
															+    predicted_label = true_labels
														
 
															+
														
 
															+    # get correct/false
														
 
															+    accuracies = predicted_label == true_labels
														
 
															+
														
 
															+    mces = []
														
 
															+
														
 
															+    for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
														
 
															+        # bin sample
														
 
															+        in_bin = np.logical_and(
														
 
															+            confidences > bin_lower.item(), confidences < bin_upper.item()
														
 
															+        )
														
 
															+        prob_in_bin = in_bin.mean()
														
 
															+
														
 
															+        if prob_in_bin > 0:
														
 
															+            accuracy_in_bin = accuracies[in_bin].mean()
														
 
															+            avg_confid = confidences[in_bin].mean()
														
 
															+            mces.append(np.abs(avg_confid - accuracy_in_bin))
														
 
															+
														
 
															+    return max(mces)