2 ay önce · 53c6d721b1
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -4,6 +4,7 @@
 
															     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
														
 
															     "version": "0.2.0",
														
 
															     "configurations": [
														
 
															+        {"name":"Python Debugger: Current File","type":"debugpy","request":"launch","program":"${file}","console":"integratedTerminal"},
														
 
															         {
														
 
															             "name": "Python Debugger: Current File",
														
--- a/LP_ADNIMERGE.csv
+++ b/LP_ADNIMERGE.csv
--- a/bayesian.py
+++ b/bayesian.py
@@ -52,12 +52,9 @@ for epoch in range(config["training"]["epochs"]):
 
															         loss = loss / len(data)
														
 
															         loss.backward()
														
 
															         optimizer.step()
														
 
															-        
														
 
															-#Test the model
														
 
															+
														
 
															+# Test the model
														
 
															 model.eval()
														
 
															 with torch.no_grad():
														
 
															     output_li
														
 
															-        
														
 
															-    
														
 
															-
														
--- a/config.toml
+++ b/config.toml
@@ -1,12 +1,15 @@
 
															 [paths]
														
 
															-mri_data = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'
														
 
															-xls_data = '/export/home/nschense/alzheimers/alzheimers_nn/LP_ADNIMERGE.csv'
														
 
															+#mri_data = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'
														
 
															+#xls_data = '/export/home/nschense/alzheimers/alzheimers_nn/LP_ADNIMERGE.csv'
														
 
															+
														
 
															+mri_data = '../data/PET_volumes_customtemplate_float32/'
														
 
															+xls_data = '../data/LP_ADNIMERGE.csv'
														
 
															 #CHANGE THESE BEFORE RUNNING
														
 
															-model_output = '/export/home/nschense/alzheimers/alzheimers_nn/saved_models/'
														
 
															+model_output = '../models/'
														
 
															 [training]
														
 
															-device = 'cuda:1' 
														
 
															+device = 'mps' 
														
 
															 runs = 50 # Number of models
														
 
															 max_epochs = 30 # Epochs per model
														
--- a/ruff.toml
+++ b/ruff.toml
@@ -1,6 +0,0 @@
 
															-[lint]
														
 
															-select = ["E4", "E7", "E9", "F", "B"]
														
 
															-
														
 
															-
														
 
															-[format]
														
 
															-quote-style = "single"
														
--- a/threshold.py
+++ b/threshold.py
@@ -27,14 +27,14 @@ def plot_coverage(
 
															     plt.plot(
														
 
															         percentiles,
														
 
															         ensemble_results,
														
 
															-        'ob',
														
 
															-        label='Ensemble',
														
 
															+        "ob",
														
 
															+        label="Ensemble",
														
 
															     )
														
 
															     plt.plot(
														
 
															         percentiles,
														
 
															         individual_results,
														
 
															-        'xr',
														
 
															-        label='Individual (on entire dataset)',
														
 
															+        "xr",
														
 
															+        label="Individual (on entire dataset)",
														
 
															     )
														
 
															     plt.xlabel(x_lablel)
														
 
															     plt.ylabel(y_label)
														
@@ -50,17 +50,17 @@ def plot_coverage(
 
															 RUN = False
														
 
															 # CONFIGURATION
														
 
															-if os.getenv('ADL_CONFIG_PATH') is None:
														
 
															-    with open('config.toml', 'rb') as f:
														
 
															+if os.getenv("ADL_CONFIG_PATH") is None:
														
 
															+    with open("config.toml", "rb") as f:
														
 
															         config = toml.load(f)
														
 
															 else:
														
 
															-    with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
														
 
															+    with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
														
 
															         config = toml.load(f)
														
 
															 ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
														
 
															-V2_PATH = ENSEMBLE_PATH + '/v2'
														
 
															+V2_PATH = ENSEMBLE_PATH + "/v2"
														
 
															 # Result is a 1x2 tensor, with the softmax of the 2 predicted classes
														
@@ -75,14 +75,14 @@ def output_to_confidence(result):
 
															 # This function conducts tests on the models and returns the results, as well as saving the predictions and metrics
														
 
															 def get_predictions(config):
														
 
															     models, model_descs = ens.load_models(
														
 
															-        f'{ENSEMBLE_PATH}/models/',
														
 
															-        config['training']['device'],
														
 
															+        f"{ENSEMBLE_PATH}/models/",
														
 
															+        config["training"]["device"],
														
 
															     )
														
 
															-    models = [model.to(config['training']['device']) for model in models]
														
 
															-    test_set = torch.load(f'{ENSEMBLE_PATH}/test_dataset.pt') + torch.load(
														
 
															-        f'{ENSEMBLE_PATH}/val_dataset.pt'
														
 
															+    models = [model.to(config["training"]["device"]) for model in models]
														
 
															+    test_set = torch.load(f"{ENSEMBLE_PATH}/test_dataset.pt") + torch.load(
														
 
															+        f"{ENSEMBLE_PATH}/val_dataset.pt"
														
 
															     )
														
 
															-    print(f'Loaded {len(test_set)} samples')
														
 
															+    print(f"Loaded {len(test_set)} samples")
														
 
															     # [([model results], labels)]
														
 
															     results = []
														
@@ -93,12 +93,12 @@ def get_predictions(config):
 
															     for _, (data, target) in tqdm(
														
 
															         enumerate(test_set),
														
 
															         total=len(test_set),
														
 
															-        desc='Getting predictions',
														
 
															-        unit='sample',
														
 
															+        desc="Getting predictions",
														
 
															+        unit="sample",
														
 
															     ):
														
 
															         mri, xls = data
														
 
															-        mri = mri.unsqueeze(0).to(config['training']['device'])
														
 
															-        xls = xls.unsqueeze(0).to(config['training']['device'])
														
 
															+        mri = mri.unsqueeze(0).to(config["training"]["device"])
														
 
															+        xls = xls.unsqueeze(0).to(config["training"]["device"])
														
 
															         data = (mri, xls)
														
 
															         res = []
														
 
															         for j, model in enumerate(models):
														
@@ -164,125 +164,125 @@ if RUN:
 
															     # Convert to pandas dataframes
														
 
															     confs_df = pd.DataFrame(
														
 
															         confs,
														
 
															-        columns=['predicted_class', 'confidence', 'true_label', 'class_1', 'class_2'],
														
 
															+        columns=["predicted_class", "confidence", "true_label", "class_1", "class_2"],
														
 
															     )
														
 
															     stdevs_df = pd.DataFrame(
														
 
															-        stdevs, columns=['predicted_class', 'stdev', 'true_label', 'class_1', 'class_2']
														
 
															+        stdevs, columns=["predicted_class", "stdev", "true_label", "class_1", "class_2"]
														
 
															     )
														
 
															     entropies_df = pd.DataFrame(
														
 
															         entropies,
														
 
															-        columns=['predicted_class', 'entropy', 'true_label', 'class_1', 'class_2'],
														
 
															+        columns=["predicted_class", "entropy", "true_label", "class_1", "class_2"],
														
 
															     )
														
 
															-    indv_df = pd.DataFrame(indv_results, columns=['class_1', 'class_2', 'true_label'])
														
 
															+    indv_df = pd.DataFrame(indv_results, columns=["class_1", "class_2", "true_label"])
														
 
															     if not os.path.exists(V2_PATH):
														
 
															         os.makedirs(V2_PATH)
														
 
															-    confs_df.to_csv(f'{V2_PATH}/ensemble_confidences.csv')
														
 
															-    stdevs_df.to_csv(f'{V2_PATH}/ensemble_stdevs.csv')
														
 
															-    entropies_df.to_csv(f'{V2_PATH}/ensemble_entropies.csv')
														
 
															-    indv_df.to_csv(f'{V2_PATH}/individual_results.csv')
														
 
															+    confs_df.to_csv(f"{V2_PATH}/ensemble_confidences.csv")
														
 
															+    stdevs_df.to_csv(f"{V2_PATH}/ensemble_stdevs.csv")
														
 
															+    entropies_df.to_csv(f"{V2_PATH}/ensemble_entropies.csv")
														
 
															+    indv_df.to_csv(f"{V2_PATH}/individual_results.csv")
														
 
															 else:
														
 
															-    confs_df = pd.read_csv(f'{V2_PATH}/ensemble_confidences.csv')
														
 
															-    stdevs_df = pd.read_csv(f'{V2_PATH}/ensemble_stdevs.csv')
														
 
															-    entropies_df = pd.read_csv(f'{V2_PATH}/ensemble_entropies.csv')
														
 
															-    indv_df = pd.read_csv(f'{V2_PATH}/individual_results.csv')
														
 
															+    confs_df = pd.read_csv(f"{V2_PATH}/ensemble_confidences.csv")
														
 
															+    stdevs_df = pd.read_csv(f"{V2_PATH}/ensemble_stdevs.csv")
														
 
															+    entropies_df = pd.read_csv(f"{V2_PATH}/ensemble_entropies.csv")
														
 
															+    indv_df = pd.read_csv(f"{V2_PATH}/individual_results.csv")
														
 
															 # Plot confidence vs standard deviation, and change color of dots based on if they are correct
														
 
															-correct_conf = confs_df[confs_df['predicted_class'] == confs_df['true_label']]
														
 
															-incorrect_conf = confs_df[confs_df['predicted_class'] != confs_df['true_label']]
														
 
															+correct_conf = confs_df[confs_df["predicted_class"] == confs_df["true_label"]]
														
 
															+incorrect_conf = confs_df[confs_df["predicted_class"] != confs_df["true_label"]]
														
 
															-correct_stdev = stdevs_df[stdevs_df['predicted_class'] == stdevs_df['true_label']]
														
 
															-incorrect_stdev = stdevs_df[stdevs_df['predicted_class'] != stdevs_df['true_label']]
														
 
															+correct_stdev = stdevs_df[stdevs_df["predicted_class"] == stdevs_df["true_label"]]
														
 
															+incorrect_stdev = stdevs_df[stdevs_df["predicted_class"] != stdevs_df["true_label"]]
														
 
															 correct_ent = entropies_df[
														
 
															-    entropies_df['predicted_class'] == entropies_df['true_label']
														
 
															+    entropies_df["predicted_class"] == entropies_df["true_label"]
														
 
															 ]
														
 
															 incorrect_ent = entropies_df[
														
 
															-    entropies_df['predicted_class'] != entropies_df['true_label']
														
 
															+    entropies_df["predicted_class"] != entropies_df["true_label"]
														
 
															 ]
														
 
															 plot, ax = plt.subplots()
														
 
															 plt.scatter(
														
 
															-    correct_conf['confidence'],
														
 
															-    correct_stdev['stdev'],
														
 
															-    color='green',
														
 
															-    label='Correct Prediction',
														
 
															+    correct_conf["confidence"],
														
 
															+    correct_stdev["stdev"],
														
 
															+    color="green",
														
 
															+    label="Correct Prediction",
														
 
															 )
														
 
															 plt.scatter(
														
 
															-    incorrect_conf['confidence'],
														
 
															-    incorrect_stdev['stdev'],
														
 
															-    color='red',
														
 
															-    label='Incorrect Prediction',
														
 
															+    incorrect_conf["confidence"],
														
 
															+    incorrect_stdev["stdev"],
														
 
															+    color="red",
														
 
															+    label="Incorrect Prediction",
														
 
															 )
														
 
															-plt.xlabel('Confidence (Raw Value)')
														
 
															-plt.ylabel('Standard Deviation (Raw Value)')
														
 
															-plt.title('Confidence vs Standard Deviation')
														
 
															+plt.xlabel("Confidence (Raw Value)")
														
 
															+plt.ylabel("Standard Deviation (Raw Value)")
														
 
															+plt.title("Confidence vs Standard Deviation")
														
 
															 plt.legend()
														
 
															-plt.savefig(f'{V2_PATH}/confidence_vs_stdev.png')
														
 
															+plt.savefig(f"{V2_PATH}/confidence_vs_stdev.png")
														
 
															 plt.close()
														
 
															 # Do the same for confidence vs entropy
														
 
															 plot, ax = plt.subplots()
														
 
															 plt.scatter(
														
 
															-    correct_conf['confidence'],
														
 
															-    correct_ent['entropy'],
														
 
															-    color='green',
														
 
															-    label='Correct Prediction',
														
 
															+    correct_conf["confidence"],
														
 
															+    correct_ent["entropy"],
														
 
															+    color="green",
														
 
															+    label="Correct Prediction",
														
 
															 )
														
 
															 plt.scatter(
														
 
															-    incorrect_conf['confidence'],
														
 
															-    incorrect_ent['entropy'],
														
 
															-    color='red',
														
 
															-    label='Incorrect Prediction',
														
 
															+    incorrect_conf["confidence"],
														
 
															+    incorrect_ent["entropy"],
														
 
															+    color="red",
														
 
															+    label="Incorrect Prediction",
														
 
															 )
														
 
															-plt.xlabel('Confidence (Raw Value)')
														
 
															-plt.ylabel('Entropy (Raw Value)')
														
 
															-plt.title('Confidence vs Entropy')
														
 
															+plt.xlabel("Confidence (Raw Value)")
														
 
															+plt.ylabel("Entropy (Raw Value)")
														
 
															+plt.title("Confidence vs Entropy")
														
 
															 plt.legend()
														
 
															-plt.savefig(f'{V2_PATH}/confidence_vs_entropy.png')
														
 
															+plt.savefig(f"{V2_PATH}/confidence_vs_entropy.png")
														
 
															 plt.close()
														
 
															 # Calculate individual model accuracy and entropy
														
 
															 # Determine predicted class
														
 
															-indv_df['predicted_class'] = indv_df[['class_1', 'class_2']].idxmax(axis=1)
														
 
															-indv_df['predicted_class'] = indv_df['predicted_class'].apply(
														
 
															-    lambda x: 0 if x == 'class_1' else 1
														
 
															+indv_df["predicted_class"] = indv_df[["class_1", "class_2"]].idxmax(axis=1)
														
 
															+indv_df["predicted_class"] = indv_df["predicted_class"].apply(
														
 
															+    lambda x: 0 if x == "class_1" else 1
														
 
															 )
														
 
															-indv_df['correct'] = indv_df['predicted_class'] == indv_df['true_label']
														
 
															-accuracy_indv = indv_df['correct'].mean()
														
 
															+indv_df["correct"] = indv_df["predicted_class"] == indv_df["true_label"]
														
 
															+accuracy_indv = indv_df["correct"].mean()
														
 
															 f1_indv = met.F1(
														
 
															-    indv_df['predicted_class'].to_numpy(), indv_df['true_label'].to_numpy()
														
 
															+    indv_df["predicted_class"].to_numpy(), indv_df["true_label"].to_numpy()
														
 
															 )
														
 
															 auc_indv = metrics.roc_auc_score(
														
 
															-    indv_df['true_label'].to_numpy(), indv_df['class_2'].to_numpy()
														
 
															+    indv_df["true_label"].to_numpy(), indv_df["class_2"].to_numpy()
														
 
															 )
														
 
															-indv_df['entropy'] = -1 * indv_df[['class_1', 'class_2']].apply(
														
 
															+indv_df["entropy"] = -1 * indv_df[["class_1", "class_2"]].apply(
														
 
															     lambda x: x * np.log(x), axis=0
														
 
															 ).sum(axis=1)
														
 
															 # Calculate percentiles for confidence and standard deviation
														
 
															-quantiles_conf = confs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
														
 
															-    'confidence'
														
 
															+quantiles_conf = confs_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
														
 
															+    "confidence"
														
 
															 ]
														
 
															-quantiles_stdev = stdevs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
														
 
															-    'stdev'
														
 
															+quantiles_stdev = stdevs_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
														
 
															+    "stdev"
														
 
															 ]
														
 
															 # Additionally for individual confidence
														
 
															-quantiles_indv_conf = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
														
 
															-    'class_2'
														
 
															+quantiles_indv_conf = indv_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
														
 
															+    "class_2"
														
 
															 ]
														
 
															 # For indivual entropy
														
 
															-quantiles_indv_entropy = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
														
 
															-    'entropy'
														
 
															+quantiles_indv_entropy = indv_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
														
 
															+    "entropy"
														
 
															 ]
														
 
															 #
														
@@ -293,13 +293,13 @@ iter_conf = it.islice(quantiles_conf.items(), 0, None)
 
															 for quantile in iter_conf:
														
 
															     percentile = quantile[0]
														
 
															-    filt = confs_df[confs_df['confidence'] >= quantile[1]]
														
 
															+    filt = confs_df[confs_df["confidence"] >= quantile[1]]
														
 
															     accuracy = (
														
 
															-        filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
														
 
															+        filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
														
 
															     )
														
 
															-    f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
														
 
															+    f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
														
 
															-    accuracies_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
														
 
															+    accuracies_conf.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
														
 
															 accuracies_df = pd.DataFrame(accuracies_conf)
														
@@ -309,11 +309,11 @@ iter_conf = it.islice(quantiles_indv_conf.items(), 0, None)
 
															 for quantile in iter_conf:
														
 
															     percentile = quantile[0]
														
 
															-    filt = indv_df[indv_df['class_2'] >= quantile[1]]
														
 
															-    accuracy = filt['correct'].mean()
														
 
															-    f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
														
 
															+    filt = indv_df[indv_df["class_2"] >= quantile[1]]
														
 
															+    accuracy = filt["correct"].mean()
														
 
															+    f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
														
 
															-    indv_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
														
 
															+    indv_conf.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
														
 
															 indv_conf_df = pd.DataFrame(indv_conf)
														
@@ -323,35 +323,35 @@ iter_entropy = it.islice(quantiles_indv_entropy.items(), 0, None)
 
															 for quantile in iter_entropy:
														
 
															     percentile = quantile[0]
														
 
															-    filt = indv_df[indv_df['entropy'] <= quantile[1]]
														
 
															-    accuracy = filt['correct'].mean()
														
 
															-    f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
														
 
															+    filt = indv_df[indv_df["entropy"] <= quantile[1]]
														
 
															+    accuracy = filt["correct"].mean()
														
 
															+    f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
														
 
															-    indv_entropy.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
														
 
															+    indv_entropy.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
														
 
															 indv_entropy_df = pd.DataFrame(indv_entropy)
														
 
															 # Plot the coverage for confidence and accuracy
														
 
															 plot_coverage(
														
 
															-    accuracies_df['percentile'],
														
 
															-    accuracies_df['accuracy'],
														
 
															-    indv_conf_df['accuracy'],
														
 
															-    'Confidence Accuracy Coverage Plot',
														
 
															-    'Minimum Confidence Percentile (Low to High)',
														
 
															-    'Accuracy',
														
 
															-    f'{V2_PATH}/coverage_conf.png',
														
 
															+    accuracies_df["percentile"],
														
 
															+    accuracies_df["accuracy"],
														
 
															+    indv_conf_df["accuracy"],
														
 
															+    "Confidence Accuracy Coverage Plot",
														
 
															+    "Minimum Confidence Percentile (Low to High)",
														
 
															+    "Accuracy",
														
 
															+    f"{V2_PATH}/coverage_conf.png",
														
 
															 )
														
 
															 # Plot the coverage for confidence and F1
														
 
															 plot_coverage(
														
 
															-    accuracies_df['percentile'],
														
 
															-    accuracies_df['f1'],
														
 
															-    indv_conf_df['f1'],
														
 
															-    'Confidence F1 Coverage Plot',
														
 
															-    'Minimum Confidence Percentile (Low to High)',
														
 
															-    'F1',
														
 
															-    f'{V2_PATH}/f1_coverage_conf.png',
														
 
															+    accuracies_df["percentile"],
														
 
															+    accuracies_df["f1"],
														
 
															+    indv_conf_df["f1"],
														
 
															+    "Confidence F1 Coverage Plot",
														
 
															+    "Minimum Confidence Percentile (Low to High)",
														
 
															+    "F1",
														
 
															+    f"{V2_PATH}/f1_coverage_conf.png",
														
 
															 )
														
 
															 # Repeat for standard deviation
														
@@ -360,90 +360,90 @@ iter_stdev = it.islice(quantiles_stdev.items(), 0, None)
 
															 for quantile in iter_stdev:
														
 
															     percentile = quantile[0]
														
 
															-    filt = stdevs_df[stdevs_df['stdev'] <= quantile[1]]
														
 
															+    filt = stdevs_df[stdevs_df["stdev"] <= quantile[1]]
														
 
															     accuracy = (
														
 
															-        filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
														
 
															+        filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
														
 
															     )
														
 
															-    f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
														
 
															+    f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
														
 
															-    accuracies_stdev.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
														
 
															+    accuracies_stdev.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
														
 
															 accuracies_stdev_df = pd.DataFrame(accuracies_stdev)
														
 
															 fig, ax = plt.subplots()
														
 
															 plt.plot(
														
 
															-    accuracies_stdev_df['percentile'],
														
 
															-    accuracies_stdev_df['accuracy'],
														
 
															-    'ob',
														
 
															-    label='Ensemble',
														
 
															+    accuracies_stdev_df["percentile"],
														
 
															+    accuracies_stdev_df["accuracy"],
														
 
															+    "ob",
														
 
															+    label="Ensemble",
														
 
															 )
														
 
															 plt.plot(
														
 
															-    accuracies_stdev_df['percentile'],
														
 
															-    [accuracy_indv] * len(accuracies_stdev_df['percentile']),
														
 
															-    'xr',
														
 
															-    label='Individual (on entire dataset)',
														
 
															+    accuracies_stdev_df["percentile"],
														
 
															+    [accuracy_indv] * len(accuracies_stdev_df["percentile"]),
														
 
															+    "xr",
														
 
															+    label="Individual (on entire dataset)",
														
 
															 )
														
 
															-plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
														
 
															-plt.ylabel('Accuracy')
														
 
															-plt.title('Standard Deviation Accuracy Coverage Plot')
														
 
															+plt.xlabel("Maximum Standard Deviation Percentile (High to Low)")
														
 
															+plt.ylabel("Accuracy")
														
 
															+plt.title("Standard Deviation Accuracy Coverage Plot")
														
 
															 plt.legend()
														
 
															 plt.gca().invert_xaxis()
														
 
															 ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
														
 
															-plt.savefig(f'{V2_PATH}/coverage_stdev.png')
														
 
															+plt.savefig(f"{V2_PATH}/coverage_stdev.png")
														
 
															 plt.close()
														
 
															 # Plot coverage vs F1 for standard deviation
														
 
															 fig, ax = plt.subplots()
														
 
															 plt.plot(
														
 
															-    accuracies_stdev_df['percentile'], accuracies_stdev_df['f1'], 'ob', label='Ensemble'
														
 
															+    accuracies_stdev_df["percentile"], accuracies_stdev_df["f1"], "ob", label="Ensemble"
														
 
															 )
														
 
															 plt.plot(
														
 
															-    accuracies_stdev_df['percentile'],
														
 
															-    [f1_indv] * len(accuracies_stdev_df['percentile']),
														
 
															-    'xr',
														
 
															-    label='Individual (on entire dataset)',
														
 
															+    accuracies_stdev_df["percentile"],
														
 
															+    [f1_indv] * len(accuracies_stdev_df["percentile"]),
														
 
															+    "xr",
														
 
															+    label="Individual (on entire dataset)",
														
 
															 )
														
 
															-plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
														
 
															-plt.ylabel('F1')
														
 
															-plt.title('Standard Deviation F1 Coverage Plot')
														
 
															+plt.xlabel("Maximum Standard Deviation Percentile (High to Low)")
														
 
															+plt.ylabel("F1")
														
 
															+plt.title("Standard Deviation F1 Coverage Plot")
														
 
															 plt.legend()
														
 
															 plt.gca().invert_xaxis()
														
 
															 ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
														
 
															-plt.savefig(f'{V2_PATH}/coverage_f1_stdev.png')
														
 
															+plt.savefig(f"{V2_PATH}/coverage_f1_stdev.png")
														
 
															 plt.close()
														
 
															 # Print overall accuracy
														
 
															 overall_accuracy = (
														
 
															-    confs_df[confs_df['predicted_class'] == confs_df['true_label']].shape[0]
														
 
															+    confs_df[confs_df["predicted_class"] == confs_df["true_label"]].shape[0]
														
 
															     / confs_df.shape[0]
														
 
															 )
														
 
															 overall_f1 = met.F1(
														
 
															-    confs_df['predicted_class'].to_numpy(), confs_df['true_label'].to_numpy()
														
 
															+    confs_df["predicted_class"].to_numpy(), confs_df["true_label"].to_numpy()
														
 
															 )
														
 
															 # Calculate ECE and MCE
														
 
															 conf_ece = met.ECE(
														
 
															-    confs_df['predicted_class'].to_numpy(),
														
 
															-    confs_df['confidence'].to_numpy(),
														
 
															-    confs_df['true_label'].to_numpy(),
														
 
															+    confs_df["predicted_class"].to_numpy(),
														
 
															+    confs_df["confidence"].to_numpy(),
														
 
															+    confs_df["true_label"].to_numpy(),
														
 
															 )
														
 
															 stdev_ece = met.ECE(
														
 
															-    stdevs_df['predicted_class'].to_numpy(),
														
 
															-    stdevs_df['stdev'].to_numpy(),
														
 
															-    stdevs_df['true_label'].to_numpy(),
														
 
															+    stdevs_df["predicted_class"].to_numpy(),
														
 
															+    stdevs_df["stdev"].to_numpy(),
														
 
															+    stdevs_df["true_label"].to_numpy(),
														
 
															 )
														
 
															-print(f'Overall accuracy: {overall_accuracy}, Overall F1: {overall_f1},')
														
 
															-print(f'Confidence ECE: {conf_ece}')
														
 
															-print(f'Standard Deviation ECE: {stdev_ece}')
														
 
															+print(f"Overall accuracy: {overall_accuracy}, Overall F1: {overall_f1},")
														
 
															+print(f"Confidence ECE: {conf_ece}")
														
 
															+print(f"Standard Deviation ECE: {stdev_ece}")
														
 
															 # Repeat for entropy
														
 
															-quantiles_entropy = entropies_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
														
 
															-    'entropy'
														
 
															+quantiles_entropy = entropies_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
														
 
															+    "entropy"
														
 
															 ]
														
 
															 accuracies_entropy = []
														
@@ -451,14 +451,14 @@ iter_entropy = it.islice(quantiles_entropy.items(), 0, None)
 
															 for quantile in iter_entropy:
														
 
															     percentile = quantile[0]
														
 
															-    filt = entropies_df[entropies_df['entropy'] <= quantile[1]]
														
 
															+    filt = entropies_df[entropies_df["entropy"] <= quantile[1]]
														
 
															     accuracy = (
														
 
															-        filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
														
 
															+        filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
														
 
															     )
														
 
															-    f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
														
 
															+    f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
														
 
															     accuracies_entropy.append(
														
 
															-        {'percentile': percentile, 'accuracy': accuracy, 'f1': f1}
														
 
															+        {"percentile": percentile, "accuracy": accuracy, "f1": f1}
														
 
															     )
														
 
															 accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
														
@@ -466,23 +466,23 @@ accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
 
															 # Plot the coverage for entropy and accuracy
														
 
															 plot_coverage(
														
 
															-    accuracies_entropy_df['percentile'],
														
 
															-    accuracies_entropy_df['accuracy'],
														
 
															-    indv_entropy_df['accuracy'],
														
 
															-    'Entropy Accuracy Coverage Plot',
														
 
															-    'Minimum Entropy Percentile (Low to High)',
														
 
															-    'Accuracy',
														
 
															-    f'{V2_PATH}/coverage_entropy.png',
														
 
															+    accuracies_entropy_df["percentile"],
														
 
															+    accuracies_entropy_df["accuracy"],
														
 
															+    indv_entropy_df["accuracy"],
														
 
															+    "Entropy Accuracy Coverage Plot",
														
 
															+    "Minimum Entropy Percentile (Low to High)",
														
 
															+    "Accuracy",
														
 
															+    f"{V2_PATH}/coverage_entropy.png",
														
 
															 )
														
 
															 # Plot the coverage for entropy and F1
														
 
															 plot_coverage(
														
 
															-    accuracies_entropy_df['percentile'],
														
 
															-    accuracies_entropy_df['f1'],
														
 
															-    indv_entropy_df['f1'],
														
 
															-    'Entropy F1 Coverage Plot',
														
 
															-    'Maximum Entropy Percentile (High to Low)',
														
 
															-    'F1',
														
 
															-    f'{V2_PATH}/f1_coverage_entropy.png',
														
 
															+    accuracies_entropy_df["percentile"],
														
 
															+    accuracies_entropy_df["f1"],
														
 
															+    indv_entropy_df["f1"],
														
 
															+    "Entropy F1 Coverage Plot",
														
 
															+    "Maximum Entropy Percentile (High to Low)",
														
 
															+    "F1",
														
 
															+    f"{V2_PATH}/f1_coverage_entropy.png",
														
 
															     flip=True,
														
 
															 )
														
--- a/utils/data/datasets.py
+++ b/utils/data/datasets.py
@@ -18,19 +18,31 @@ Prepares CustomDatasets for training, validating, and testing CNN
 
															 def prepare_datasets(mri_dir, xls_file, val_split=0.2, seed=50, device=None):
														
 
															     if device is None:
														
 
															-        device = torch.device('cpu')
														
 
															+        device = torch.device("cpu")
														
 
															     rndm = random.Random(seed)
														
 
															-    xls_data = pd.read_csv(xls_file).set_index('Image Data ID')
														
 
															-    raw_data = glob.glob(mri_dir + '*')
														
 
															+    xls_data = pd.read_csv(xls_file)
														
 
															+
														
 
															+    # Strip all trailing whitespace from dataframe
														
 
															+    xls_data = xls_data.replace(r"^ +| +$", r"", regex=True)
														
 
															+
														
 
															+    # Strip all trailing whitespace from column names
														
 
															+    xls_data.columns = xls_data.columns.str.strip()
														
 
															+
														
 
															+    xls_data = xls_data.set_index("Image Data ID")
														
 
															+
														
 
															+    raw_data = glob.glob(mri_dir + "*")
														
 
															+
														
 
															+    print(f"Found {len(raw_data)} images in {mri_dir}")
														
 
															+
														
 
															     AD_list = []
														
 
															     NL_list = []
														
 
															     # TODO Check that image is in CSV?
														
 
															     for image in raw_data:
														
 
															-        if 'NL' in image:
														
 
															+        if "NL" in image:
														
 
															             NL_list.append(image)
														
 
															-        elif 'AD' in image:
														
 
															+        elif "AD" in image:
														
 
															             AD_list.append(image)
														
 
															     rndm.shuffle(AD_list)
														
@@ -90,7 +102,7 @@ def get_train_val_test(AD_list, NL_list, val_split):
 
															 class ADNIDataset(Dataset):
														
 
															-    def __init__(self, mri, xls: pd.DataFrame, device=torch.device('cpu')):
														
 
															+    def __init__(self, mri, xls: pd.DataFrame, device=torch.device("cpu")):
														
 
															         self.mri_data = mri  # DATA IS A LIST WITH TUPLES (image_dir, class_id)
														
 
															         self.xls_data = xls
														
 
															         self.device = device
														
@@ -102,9 +114,9 @@ class ADNIDataset(Dataset):
 
															         # Get used data
														
 
															         # data = xls_data.loc[['Sex', 'Age (current)', 'PTID', 'DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence)', 'Alz_csf']]
														
 
															-        data = xls_data.loc[['Sex', 'Age (current)']]
														
 
															+        data = xls_data.loc[["Sex", "Age (current)"]]
														
 
															-        data.replace({'M': 0, 'F': 1}, inplace=True)
														
 
															+        data.replace({"M": 0, "F": 1}, inplace=True)
														
 
															         # Convert to tensor
														
 
															         xls_tensor = torch.tensor(data.values.astype(float))