11 months ago · f2e7f78a40
--- a/config.toml
+++ b/config.toml
@@ -32,4 +32,4 @@ silent = false
 
															 name = 'cnn-50x30'
														
 
															 prune_threshold = 0.0 # Any models with accuracy below this threshold will be pruned, set to 0 to disable pruning
														
 
															 individual_id = 1     # The id of the individual model to be used for the ensemble
														
 
															-run_models = false    # If true, the ensemble will run the models to generate the predictions, otherwise will load from file
														
 
															+run_models = true    # If true, the ensemble will run the models to generate the predictions, otherwise will load from file
														
--- a/threshold_refac.py
+++ b/threshold_refac.py
@@ -238,6 +238,26 @@ def common_entries(*dcts):
 
															     for i in set(dcts[0]).intersection(*dcts[1:]):
														
 
															         yield (i,) + tuple(d[i] for d in dcts)
														
 
															+#Given ensemble statistics, calculate overall stats (ECE, MCE, Brier Score, NLL)
														
 
															+def calculate_overall_statistics(ensemble_statistics):
														
 
															+    predicted = ensemble_statistics['predicted']
														
 
															+    actual = ensemble_statistics['actual']
														
 
															+
														
 
															+    # New dataframe to store the statistics
														
 
															+    stats_df = pd.DataFrame(columns=['stat', 'ECE', 'MCE', 'Brier Score', 'NLL']).set_index('stat')
														
 
															+
														
 
															+    # Loop through and calculate the ECE, MCE, Brier Score, and NLL
														
 
															+    for stat in ['confidence', 'entropy', 'stdev', 'raw_confidence']:
														
 
															+        ece = met.ECE(predicted, ensemble_statistics[stat], actual)
														
 
															+        mce = met.MCE(predicted, ensemble_statistics[stat], actual)
														
 
															+        brier = met.brier_binary(ensemble_statistics[stat], actual)
														
 
															+        nll = met.nll_binary(ensemble_statistics[stat], actual)
														
 
															+
														
 
															+        stats_df.loc[stat] = [ece, mce, brier, nll]
														
 
															+    
														
 
															+    return stats_df
														
 
															+
														
 
															+
														
 
															 def main():
														
 
															     # Load the models
														
@@ -273,43 +293,14 @@ def main():
 
															         ensemble_statistics, 'confidence', low_to_high=False
														
 
															     )
														
 
															-    # Print ECE and MCE Values
														
 
															-    conf_ece = met.ECE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['confidence'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															-    conf_mce = met.MCE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['confidence'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															-
														
 
															-    ent_ece = met.ECE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['entropy'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															-    ent_mce = met.MCE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['entropy'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															+    raw_confidence = ensemble_statistics['confidence'].apply(lambda x: (x / 2) + 0.5)
														
 
															+    ensemble_statistics.insert(4, 'raw_confidence', raw_confidence)
														
 
															-    stdev_ece = met.ECE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['stdev'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															-    stdev_mce = met.MCE(
														
 
															-        ensemble_statistics['predicted'],
														
 
															-        ensemble_statistics['stdev'],
														
 
															-        ensemble_statistics['actual'],
														
 
															-    )
														
 
															+    # Calculate overall statistics
														
 
															+    overall_statistics = calculate_overall_statistics(ensemble_statistics)
														
 
															-    print(f'Confidence ECE: {conf_ece}, Confidence MCE: {conf_mce}')
														
 
															-    print(f'Entropy ECE: {ent_ece}, Entropy MCE: {ent_mce}')
														
 
															-    print(f'Stdev ECE: {stdev_ece}, Stdev MCE: {stdev_mce}')
														
 
															+    # Print overall statistics
														
 
															+    print(overall_statistics)
														
 
															     # Print overall ensemble statistics
														
 
															     print('Ensemble Statistics')
														
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -75,3 +75,14 @@ def AUC(confidences, true_labels):
 
															 def entropy(confidences):
														
 
															     return -np.sum(confidences * np.log(confidences))
														
 
															+
														
 
															+### Negative Log Likelyhood for binary classification
														
 
															+def nll_binary(confidences, true_labels):
														
 
															+    return -np.sum(np.log(confidences[true_labels == 1])) - np.sum(np.log(1 - confidences[true_labels == 0]))
														
 
															+
														
 
															+### Breier score for binary classification
														
 
															+def brier_binary(confidences, true_labels):
														
 
															+    return np.mean((confidences - true_labels) ** 2)
														
 
															+
														
 
															+
														
 
															+