1 kuukausi sitten · fb995072b3
--- a/ensemble_predict.py
+++ b/ensemble_predict.py
@@ -7,28 +7,29 @@ import math
 
															 import torch
														
 
															 # CONFIGURATION
														
 
															-if os.getenv('ADL_CONFIG_PATH') is None:
														
 
															-    with open('config.toml', 'rb') as f:
														
 
															+if os.getenv("ADL_CONFIG_PATH") is None:
														
 
															+    with open("config.toml", "rb") as f:
														
 
															         config = toml.load(f)
														
 
															 else:
														
 
															-    with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
														
 
															+    with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
														
 
															         config = toml.load(f)
														
 
															 # Force cuDNN initialization
														
 
															-force_init_cudnn(config['training']['device'])
														
 
															+force_init_cudnn(config["training"]["device"])
														
 
															 ensemble_folder = (
														
 
															-    config['paths']['model_output'] + config['ensemble']['name'] + '/models/'
														
 
															+    config["paths"]["model_output"] + config["ensemble"]["name"] + "/models/"
														
 
															 )
														
 
															-models, model_descs = ens.load_models(ensemble_folder, config['training']['device'])
														
 
															+models, model_descs = ens.load_models(ensemble_folder, config["training"]["device"])
														
 
															 models, model_descs = ens.prune_models(
														
 
															-    models, model_descs, ensemble_folder, config['ensemble']['prune_threshold']
														
 
															+    models, model_descs, ensemble_folder, config["ensemble"]["prune_threshold"]
														
 
															 )
														
 
															 # Load test data
														
 
															 test_dataset = torch.load(
														
 
															-    config['paths']['model_output'] + config['ensemble']['name'] + '/test_dataset.pt'
														
 
															+    config["paths"]["model_output"] + config["ensemble"]["name"] + "/test_dataset.pt",
														
 
															+    weights_only=False,
														
 
															 )
														
 
															 # Evaluate ensemble and uncertainty test set
														
@@ -67,22 +68,22 @@ accuracy = correct / total
 
															 with open(
														
 
															     ensemble_folder
														
 
															     + f"ensemble_test_results_{config['ensemble']['prune_threshold']}.txt",
														
 
															-    'w',
														
 
															+    "w",
														
 
															 ) as f:
														
 
															-    f.write('Accuracy: ' + str(accuracy) + '\n')
														
 
															-    f.write('Correct: ' + str(correct) + '\n')
														
 
															-    f.write('Total: ' + str(total) + '\n')
														
 
															+    f.write("Accuracy: " + str(accuracy) + "\n")
														
 
															+    f.write("Correct: " + str(correct) + "\n")
														
 
															+    f.write("Total: " + str(total) + "\n")
														
 
															     for exp, pred, stdev in zip(actual, predictions, stdevs):
														
 
															         f.write(
														
 
															             str(exp)
														
 
															-            + ', '
														
 
															+            + ", "
														
 
															             + str(pred)
														
 
															-            + ', '
														
 
															+            + ", "
														
 
															             + str(stdev)
														
 
															-            + ', '
														
 
															+            + ", "
														
 
															             + str(yes_votes)
														
 
															-            + ', '
														
 
															+            + ", "
														
 
															             + str(no_votes)
														
 
															-            + '\n'
														
 
															+            + "\n"
														
 
															         )
														
--- a/model_evaluation.py
+++ b/model_evaluation.py
@@ -0,0 +1,81 @@
 
															+import utils.ensemble as ens
														
 
															+import os
														
 
															+import tomli as tml
														
 
															+from utils.system import force_init_cudnn
														
 
															+import torch
														
 
															+import pathlib as pl
														
 
															+from utils.data.datasets import ADNIDataset
														
 
															+import xarray as xr
														
 
															+
														
 
															+
														
 
															+# CONFIGURATION
														
 
															+with open(os.getenv("ADL_CONFIG_PATH", "config.toml"), "rb") as f:
														
 
															+    config = tml.load(f)
														
 
															+
														
 
															+force_init_cudnn(config["training"]["device"])
														
 
															+
														
 
															+# INIT DATA AND MODELS
														
 
															+ensemble_folder: pl.Path = (
														
 
															+    config["paths"]["model_output"] + config["ensemble"]["name"] + "/models/"
														
 
															+)
														
 
															+
														
 
															+# Load test data
														
 
															+test_dataset: ADNIDataset = torch.load(
														
 
															+    config["paths"]["model_output"] + config["ensemble"]["name"] + "/test_dataset.pt",
														
 
															+    weights_only=False,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+models = ens.load_models(pl.Path(ensemble_folder), config["training"]["device"])
														
 
															+
														
 
															+# We are generating a large matrix, with the dimensions of the models, the test set, and the number of classes
														
 
															+# Therefore we are capturing the output of every model for every item in the test set and storing it in a matrix
														
 
															+
														
 
															+type ResultsMatrix = xr.DataArray
														
 
															+type ActualMatrix = xr.DataArray
														
 
															+
														
 
															+results: ResultsMatrix = xr.DataArray(
														
 
															+    data=0,
														
 
															+    dims=["model", "test_item", "class"],
														
 
															+    coords={
														
 
															+        "model": ens.get_model_names(models),
														
 
															+        "test_item": range(len(test_dataset)),
														
 
															+        "class": [0, 1],
														
 
															+    },
														
 
															+)
														
 
															+
														
 
															+actual: ActualMatrix = xr.DataArray(
														
 
															+    data=0,
														
 
															+    dims=["test_item", "class"],
														
 
															+    coords={
														
 
															+        "test_item": range(len(test_dataset)),
														
 
															+        "class": [0, 1],
														
 
															+    },
														
 
															+)
														
 
															+
														
 
															+final: xr.Dataset = xr.Dataset(
														
 
															+    data_vars={
														
 
															+        "evaluated": results,
														
 
															+        "actual": actual,
														
 
															+    },
														
 
															+)
														
 
															+
														
 
															+
														
 
															+# Iterate over the test set and get the predictions for each model
														
 
															+for i, (unp_data, target) in enumerate(test_dataset):
														
 
															+    data = ens.prepare_datasets(unp_data)
														
 
															+
														
 
															+    for j, (model_obj, model_name) in enumerate(models):
														
 
															+        model_obj.eval()
														
 
															+        with torch.no_grad():
														
 
															+            output: torch.Tensor = model_obj(data)
														
 
															+            final.results.loc[dict(model=model_name, test_item=i)] = output.numpy()  # type: ignore
														
 
															+            final.actual.loc[dict(test_item=i)] = target.numpy()  # type: ignore
														
 
															+
														
 
															+
														
 
															+# Save the results to a file
														
 
															+final.to_netcdf(  # type: ignore
														
 
															+    config["paths"]["model_output"] + config["ensemble"]["name"] + "/test_results.nc",
														
 
															+    mode="w",
														
 
															+    format="NETCDF4",
														
 
															+)
														
--- a/sensitivity_analysis.py
+++ b/sensitivity_analysis.py
@@ -3,83 +3,85 @@
 
															 import numpy as np
														
 
															 import pandas as pd
														
 
															 import matplotlib.pyplot as plt
														
 
															-import torch 
														
 
															+import torch
														
 
															 import os
														
 
															 import threshold_refac as th
														
 
															 import pickle as pk
														
 
															+import utils.models.cnn
														
 
															+
														
 
															+torch.serialization.safe_globals([utils.models.cnn.CNN])
														
 
															+
														
 
															 def main():
														
 
															     config = th.load_config()
														
 
															-    
														
 
															     ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
														
 
															-    V3_PATH = ENSEMBLE_PATH + '/v3'
														
 
															+    V3_PATH = ENSEMBLE_PATH + "/v3"
														
 
															     # Create the directory if it does not exist
														
 
															     if not os.path.exists(V3_PATH):
														
 
															         os.makedirs(V3_PATH)
														
 
															     # Load the models
														
 
															-    device = torch.device(config['training']['device'])
														
 
															-    models = th.load_models_v2(f'{ENSEMBLE_PATH}/models/', device)
														
 
															+    device = torch.device(config["training"]["device"])
														
 
															+    models = th.load_models_v2(f"{ENSEMBLE_PATH}/models/", device)
														
 
															     # Load Dataset
														
 
															-    dataset = torch.load(f'{ENSEMBLE_PATH}/test_dataset.pt') + torch.load(
														
 
															-        f'{ENSEMBLE_PATH}/val_dataset.pt'
														
 
															-    )
														
 
															+    dataset = torch.load(
														
 
															+        f"{ENSEMBLE_PATH}/test_dataset.pt", weights_only=False
														
 
															+    ) + torch.load(f"{ENSEMBLE_PATH}/val_dataset.pt", weights_only=False)
														
 
															-    if config['ensemble']['run_models']:
														
 
															+    if config["ensemble"]["run_models"]:
														
 
															         # Get thre predicitons of the ensemble
														
 
															         ensemble_predictions = th.ensemble_dataset_predictions(models, dataset, device)
														
 
															         # Save to file using pickle
														
 
															-        with open(f'{V3_PATH}/ensemble_predictions.pk', 'wb') as f:
														
 
															+        with open(f"{V3_PATH}/ensemble_predictions.pk", "wb") as f:
														
 
															             pk.dump(ensemble_predictions, f)
														
 
															     else:
														
 
															         # Load the predictions from file
														
 
															-        with open(f'{V3_PATH}/ensemble_predictions.pk', 'rb') as f:
														
 
															+        with open(f"{V3_PATH}/ensemble_predictions.pk", "rb") as f:
														
 
															             ensemble_predictions = pk.load(f)
														
 
															     # Now that we have the predictions, we can run the sensitivity analysis
														
 
															-    #We do this by getting the stats for each possible number of models in the ensemble
														
 
															+    # We do this by getting the stats for each possible number of models in the ensemble
														
 
															     # We will store the results in a dataframe with number of models and the stats
														
 
															-    results = pd.DataFrame(columns=['num_models', 'ECE', 'accuracy']).set_index('num_models')
														
 
															+    results = pd.DataFrame(columns=["num_models", "ECE", "accuracy"]).set_index(
														
 
															+        "num_models"
														
 
															+    )
														
 
															     for i in range(2, len(models) + 1):
														
 
															         sel_preds = th.select_subset_models(ensemble_predictions, range(i))
														
 
															-
														
 
															         sel_stats = th.calculate_statistics(sel_preds)
														
 
															-        raw_confidence = sel_stats['confidence'].apply(lambda x: (x / 2) + 0.5)
														
 
															-        sel_stats.insert(4, 'raw_confidence', raw_confidence)
														
 
															+        raw_confidence = sel_stats["confidence"].apply(lambda x: (x / 2) + 0.5)
														
 
															+        sel_stats.insert(4, "raw_confidence", raw_confidence)
														
 
															         stats = th.calculate_overall_statistics(sel_stats)
														
 
															-        ece = stats.at['raw_confidence', 'ECE']
														
 
															-        accuracy = sel_stats['correct'].mean()
														
 
															+        ece = stats.at["raw_confidence", "ECE"]
														
 
															+        accuracy = sel_stats["correct"].mean()
														
 
															         results.loc[i] = (ece, accuracy)
														
 
															     # Save the results to a file
														
 
															-    results.to_csv(f'{V3_PATH}/sensitivity_analysis.csv')
														
 
															+    results.to_csv(f"{V3_PATH}/sensitivity_analysis.csv")
														
 
															     # Plot the results
														
 
															-    plt.plot(results.index, results['ECE'])
														
 
															-    plt.xlabel('Number of Models')
														
 
															-    plt.ylabel('ECE')
														
 
															-    plt.title('Sensitivity Analysis')
														
 
															-    plt.savefig(f'{V3_PATH}/sensitivity_analysis.png')
														
 
															+    plt.plot(results.index, results["ECE"])
														
 
															+    plt.xlabel("Number of Models")
														
 
															+    plt.ylabel("ECE")
														
 
															+    plt.title("Sensitivity Analysis")
														
 
															+    plt.savefig(f"{V3_PATH}/sensitivity_analysis.png")
														
 
															     plt.close()
														
 
															-    plt.plot(results.index, results['accuracy'])
														
 
															-    plt.xlabel('Number of Models')
														
 
															-    plt.ylabel('Accuracy')
														
 
															-    plt.title('Sensitivity Analysis')
														
 
															-    plt.savefig(f'{V3_PATH}/sensitivity_analysis_accuracy.png')
														
 
															+    plt.plot(results.index, results["accuracy"])
														
 
															+    plt.xlabel("Number of Models")
														
 
															+    plt.ylabel("Accuracy")
														
 
															+    plt.title("Sensitivity Analysis")
														
 
															+    plt.savefig(f"{V3_PATH}/sensitivity_analysis_accuracy.png")
														
 
															     plt.close()
														
 
															-
														
 
															-
														
 
															 if __name__ == "__main__":
														
 
															-    main()
														
 
															+    main()
														
--- a/threshold_refac.py
+++ b/threshold_refac.py
@@ -11,8 +11,40 @@ import matplotlib.ticker as ticker
 
															 import glob
														
 
															 import pickle as pk
														
 
															 import warnings
														
 
															+import utils.models.cnn
														
 
															+import torch.serialization
														
 
															+import utils.models.layers
														
 
															+import utils.data.datasets
														
 
															+import pandas.core.frame
														
 
															+import pandas.core.internals.managers
														
 
															+
														
 
															+torch.serialization.add_safe_globals(
														
 
															+    [
														
 
															+        torch.nn.modules.linear.Linear,
														
 
															+        torch.nn.modules.batchnorm.BatchNorm3d,
														
 
															+        torch.nn.modules.container.Sequential,
														
 
															+        torch.nn.modules.activation.ELU,
														
 
															+        utils.models.layers.ConvBlock,
														
 
															+        utils.models.layers.FullConnBlock,
														
 
															+        utils.models.layers.SplitConvBlock,
														
 
															+        utils.models.layers.SepConv3d,
														
 
															+        utils.models.cnn.CNN_Image_Section,
														
 
															+        torch.nn.modules.dropout.Dropout,
														
 
															+        torch.nn.modules.conv.Conv3d,
														
 
															+        torch.nn.modules.batchnorm.BatchNorm1d,
														
 
															+        utils.models.layers.MidFlowBlock,
														
 
															+        torch.nn.modules.activation.Softmax,
														
 
															+        utils.models.layers.SepConvBlock,
														
 
															+        utils.models.cnn.CNN,
														
 
															+        utils.data.datasets.ADNIDataset,
														
 
															+        pandas.core.frame.DataFrame,
														
 
															+        pandas.core.internals.managers.BlockManager,
														
 
															+        pandas._libs.internals._unpickle_block,
														
 
															+    ]
														
 
															+)
														
 
															+
														
 
															-warnings.filterwarnings('error')
														
 
															+warnings.filterwarnings("error")
														
 
															 def plot_image_grid(image_ids, dataset, rows, path, titles=None):
														
@@ -23,8 +55,8 @@ def plot_image_grid(image_ids, dataset, rows, path, titles=None):
 
															         # We now have a 3d image of size (91, 109, 91), and we want to take a slice from the middle of the image
														
 
															         image = image[:, :, 45]
														
 
															-        ax.imshow(image, cmap='gray')
														
 
															-        ax.axis('off')
														
 
															+        ax.imshow(image, cmap="gray")
														
 
															+        ax.axis("off")
														
 
															         if titles is not None:
														
 
															             ax.set_title(titles[i])
														
@@ -38,8 +70,8 @@ def plot_single_image(image_id, dataset, path, title=None):
 
															     # We now have a 3d image of size (91, 109, 91), and we want to take a slice from the middle of the image
														
 
															     image = image[:, :, 45]
														
 
															-    ax.imshow(image, cmap='gray')
														
 
															-    ax.axis('off')
														
 
															+    ax.imshow(image, cmap="gray")
														
 
															+    ax.axis("off")
														
 
															     if title is not None:
														
 
															         ax.set_title(title)
														
@@ -52,23 +84,23 @@ def plot_statistics_versus(
 
															     stat_1, stat_2, xaxis_name, yaxis_name, title, dataframe, path, annotate=False
														
 
															 ):
														
 
															     # Get correct predictions and incorrect predictions dataframes
														
 
															-    corr_df = dataframe[dataframe['correct']]
														
 
															-    incorr_df = dataframe[~dataframe['correct']]
														
 
															+    corr_df = dataframe[dataframe["correct"]]
														
 
															+    incorr_df = dataframe[~dataframe["correct"]]
														
 
															     # Plot the correct and incorrect predictions
														
 
															     fig, ax = plt.subplots()
														
 
															-    ax.scatter(corr_df[stat_1], corr_df[stat_2], c='green', label='Correct')
														
 
															-    ax.scatter(incorr_df[stat_1], incorr_df[stat_2], c='red', label='Incorrect')
														
 
															+    ax.scatter(corr_df[stat_1], corr_df[stat_2], c="green", label="Correct")
														
 
															+    ax.scatter(incorr_df[stat_1], incorr_df[stat_2], c="red", label="Incorrect")
														
 
															     ax.legend()
														
 
															     ax.set_xlabel(xaxis_name)
														
 
															     ax.set_ylabel(yaxis_name)
														
 
															     ax.set_title(title)
														
 
															     if annotate:
														
 
															-        print('DEBUG -- REMOVE: Annotating')
														
 
															+        print("DEBUG -- REMOVE: Annotating")
														
 
															         # label correct points green
														
 
															         for row in dataframe[[stat_1, stat_2]].itertuples():
														
 
															-            plt.text(row[1], row[2], row[0], fontsize=6, color='black')
														
 
															+            plt.text(row[1], row[2], row[0], fontsize=6, color="black")
														
 
															     plt.savefig(path)
														
@@ -87,23 +119,25 @@ def get_model_predictions(models, data):
 
															 def load_models_v2(folder, device):
														
 
															-    glob_path = os.path.join(folder, '*.pt')
														
 
															+    glob_path = os.path.join(folder, "*.pt")
														
 
															     model_files = glob.glob(glob_path)
														
 
															     model_dict = {}
														
 
															     for model_file in model_files:
														
 
															+        print(model_file)
														
 
															         model = torch.load(model_file, map_location=device)
														
 
															-        model_id = os.path.basename(model_file).split('_')[0]
														
 
															+        model_id = os.path.basename(model_file).split("_")[0]
														
 
															         model_dict[model_id] = model
														
 
															     if len(model_dict) == 0:
														
 
															-        raise FileNotFoundError('No models found in the specified directory: ' + folder)
														
 
															+        raise FileNotFoundError("No models found in the specified directory: " + folder)
														
 
															     return model_dict
														
 
															 # Ensures that both mri and xls tensors in the data are unsqueezed and are on the correct device
														
 
															 def preprocess_data(data, device):
														
 
															+
														
 
															     mri, xls = data
														
 
															     mri = mri.unsqueeze(0).to(device)
														
 
															     xls = xls.unsqueeze(0).to(device)
														
@@ -160,13 +194,13 @@ def calculate_statistics(predictions):
 
															     # Create DataFrame with columns for each statistic
														
 
															     stats_df = pd.DataFrame(
														
 
															         columns=[
														
 
															-            'mean',
														
 
															-            'stdev',
														
 
															-            'entropy',
														
 
															-            'confidence',
														
 
															-            'correct',
														
 
															-            'predicted',
														
 
															-            'actual',
														
 
															+            "mean",
														
 
															+            "stdev",
														
 
															+            "entropy",
														
 
															+            "confidence",
														
 
															+            "correct",
														
 
															+            "predicted",
														
 
															+            "actual",
														
 
															         ]
														
 
															     )
														
@@ -215,7 +249,7 @@ def conduct_threshold_analysis(statistics, statistic_name, low_to_high=True):
 
															     )
														
 
															     # Dictionary of form {threshold: {metric: value}}
														
 
															-    thresholds_pd = pd.DataFrame(index=percentile_df.index, columns=['accuracy', 'f1'])
														
 
															+    thresholds_pd = pd.DataFrame(index=percentile_df.index, columns=["accuracy", "f1"])
														
 
															     for percentile, value in percentile_df.items():
														
 
															         # Filter the statistics
														
 
															         if low_to_high:
														
@@ -224,11 +258,11 @@ def conduct_threshold_analysis(statistics, statistic_name, low_to_high=True):
 
															             filtered_statistics = statistics[statistics[statistic_name] >= value]
														
 
															         # Calculate accuracy and f1 score
														
 
															-        accuracy = filtered_statistics['correct'].mean()
														
 
															+        accuracy = filtered_statistics["correct"].mean()
														
 
															         # Calculate F1 score
														
 
															-        predicted = filtered_statistics['predicted'].values
														
 
															-        actual = filtered_statistics['actual'].values
														
 
															+        predicted = filtered_statistics["predicted"].values
														
 
															+        actual = filtered_statistics["actual"].values
														
 
															         f1 = metrics.f1_score(actual, predicted)
														
@@ -251,7 +285,7 @@ def plot_threshold_analysis(
 
															     metric = list(thresholds_metric.values)
														
 
															     # Plot the metric against the threshold
														
 
															-    plt.plot(thresholds, metric, 'bo-', label='Ensemble')
														
 
															+    plt.plot(thresholds, metric, "bo-", label="Ensemble")
														
 
															     if additional_set is not None:
														
 
															         # Get the thresholds and metrics
														
@@ -259,7 +293,7 @@ def plot_threshold_analysis(
 
															         metric = list(additional_set.values)
														
 
															         # Plot the metric against the threshold
														
 
															-        plt.plot(thresholds, metric, 'rx-', label='Individual')
														
 
															+        plt.plot(thresholds, metric, "rx-", label="Individual")
														
 
															     if flip:
														
 
															         ax.invert_xaxis()
														
@@ -286,16 +320,16 @@ def common_entries(*dcts):
 
															 # Given ensemble statistics, calculate overall stats (ECE, MCE, Brier Score, NLL)
														
 
															 def calculate_overall_statistics(ensemble_statistics):
														
 
															-    predicted = ensemble_statistics['predicted']
														
 
															-    actual = ensemble_statistics['actual']
														
 
															+    predicted = ensemble_statistics["predicted"]
														
 
															+    actual = ensemble_statistics["actual"]
														
 
															     # New dataframe to store the statistics
														
 
															     stats_df = pd.DataFrame(
														
 
															-        columns=['stat', 'ECE', 'MCE', 'Brier Score', 'NLL']
														
 
															-    ).set_index('stat')
														
 
															+        columns=["stat", "ECE", "MCE", "Brier Score", "NLL"]
														
 
															+    ).set_index("stat")
														
 
															     # Loop through and calculate the ECE, MCE, Brier Score, and NLL
														
 
															-    for stat in ['confidence', 'entropy', 'stdev', 'raw_confidence']:
														
 
															+    for stat in ["confidence", "entropy", "stdev", "raw_confidence"]:
														
 
															         ece = met.ECE(predicted, ensemble_statistics[stat], actual)
														
 
															         mce = met.MCE(predicted, ensemble_statistics[stat], actual)
														
 
															         brier = met.brier_binary(ensemble_statistics[stat], actual)
														
@@ -308,15 +342,16 @@ def calculate_overall_statistics(ensemble_statistics):
 
															 # CONFIGURATION
														
 
															 def load_config():
														
 
															-    if os.getenv('ADL_CONFIG_PATH') is None:
														
 
															-        with open('config.toml', 'rb') as f:
														
 
															+    if os.getenv("ADL_CONFIG_PATH") is None:
														
 
															+        with open("config.toml", "rb") as f:
														
 
															             config = toml.load(f)
														
 
															     else:
														
 
															-        with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
														
 
															+        with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
														
 
															             config = toml.load(f)
														
 
															     return config
														
 
															+
														
 
															 def prune_dataset(dataset, pruned_ids):
														
 
															     pruned_dataset = []
														
 
															     for i, (data, target) in enumerate(dataset):
														
@@ -331,59 +366,58 @@ def main():
 
															     ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
														
 
															-    V3_PATH = ENSEMBLE_PATH + '/v3'
														
 
															+    V3_PATH = ENSEMBLE_PATH + "/v3"
														
 
															     # Create the directory if it does not exist
														
 
															     if not os.path.exists(V3_PATH):
														
 
															         os.makedirs(V3_PATH)
														
 
															     # Load the models
														
 
															-    device = torch.device(config['training']['device'])
														
 
															-    models = load_models_v2(f'{ENSEMBLE_PATH}/models/', device)
														
 
															+    device = torch.device(config["training"]["device"])
														
 
															+    models = load_models_v2(f"{ENSEMBLE_PATH}/models/", device)
														
 
															     # Load Dataset
														
 
															-    dataset = torch.load(f'{ENSEMBLE_PATH}/test_dataset.pt') + torch.load(
														
 
															-        f'{ENSEMBLE_PATH}/val_dataset.pt'
														
 
															-    )
														
 
															-
														
 
															+    dataset = torch.load(
														
 
															+        f"{ENSEMBLE_PATH}/test_dataset.pt", weights_only=False
														
 
															+    ) + torch.load(f"{ENSEMBLE_PATH}/val_dataset.pt", weights_only=False)
														
 
															-    if config['ensemble']['run_models']:
														
 
															+    if config["ensemble"]["run_models"]:
														
 
															         # Get thre predicitons of the ensemble
														
 
															         ensemble_predictions = ensemble_dataset_predictions(models, dataset, device)
														
 
															         # Save to file using pickle
														
 
															-        with open(f'{V3_PATH}/ensemble_predictions.pk', 'wb') as f:
														
 
															+        with open(f"{V3_PATH}/ensemble_predictions.pk", "wb") as f:
														
 
															             pk.dump(ensemble_predictions, f)
														
 
															     else:
														
 
															         # Load the predictions from file
														
 
															-        with open(f'{V3_PATH}/ensemble_predictions.pk', 'rb') as f:
														
 
															+        with open(f"{V3_PATH}/ensemble_predictions.pk", "rb") as f:
														
 
															             ensemble_predictions = pk.load(f)
														
 
															     # Get the statistics and thresholds of the ensemble
														
 
															     ensemble_statistics = calculate_statistics(ensemble_predictions)
														
 
															     stdev_thresholds = conduct_threshold_analysis(
														
 
															-        ensemble_statistics, 'stdev', low_to_high=True
														
 
															+        ensemble_statistics, "stdev", low_to_high=True
														
 
															     )
														
 
															     entropy_thresholds = conduct_threshold_analysis(
														
 
															-        ensemble_statistics, 'entropy', low_to_high=True
														
 
															+        ensemble_statistics, "entropy", low_to_high=True
														
 
															     )
														
 
															     confidence_thresholds = conduct_threshold_analysis(
														
 
															-        ensemble_statistics, 'confidence', low_to_high=False
														
 
															+        ensemble_statistics, "confidence", low_to_high=False
														
 
															     )
														
 
															-    raw_confidence = ensemble_statistics['confidence'].apply(lambda x: (x / 2) + 0.5)
														
 
															-    ensemble_statistics.insert(4, 'raw_confidence', raw_confidence)
														
 
															+    raw_confidence = ensemble_statistics["confidence"].apply(lambda x: (x / 2) + 0.5)
														
 
															+    ensemble_statistics.insert(4, "raw_confidence", raw_confidence)
														
 
															     # Plot confidence vs standard deviation
														
 
															     plot_statistics_versus(
														
 
															-        'raw_confidence',
														
 
															-        'stdev',
														
 
															-        'Confidence',
														
 
															-        'Standard Deviation',
														
 
															-        'Confidence vs Standard Deviation',
														
 
															+        "raw_confidence",
														
 
															+        "stdev",
														
 
															+        "Confidence",
														
 
															+        "Standard Deviation",
														
 
															+        "Confidence vs Standard Deviation",
														
 
															         ensemble_statistics,
														
 
															-        f'{V3_PATH}/confidence_vs_stdev.png',
														
 
															+        f"{V3_PATH}/confidence_vs_stdev.png",
														
 
															         annotate=True,
														
 
															     )
														
@@ -393,35 +427,35 @@ def main():
 
															         [279, 202, 28, 107, 27, 121],
														
 
															         dataset,
														
 
															         2,
														
 
															-        f'{V3_PATH}/image_grid.png',
														
 
															+        f"{V3_PATH}/image_grid.png",
														
 
															         titles=[
														
 
															-            'Weird: 279',
														
 
															-            'Weird: 202',
														
 
															-            'Weird: 28',
														
 
															-            'Normal: 107',
														
 
															-            'Normal: 27',
														
 
															-            'Normal: 121',
														
 
															+            "Weird: 279",
														
 
															+            "Weird: 202",
														
 
															+            "Weird: 28",
														
 
															+            "Normal: 107",
														
 
															+            "Normal: 27",
														
 
															+            "Normal: 121",
														
 
															         ],
														
 
															     )
														
 
															     # Filter dataset for where confidence < .7 and stdev < .1
														
 
															     weird_results = ensemble_statistics.loc[
														
 
															         (
														
 
															-            (ensemble_statistics['raw_confidence'] < 0.7)
														
 
															-            & (ensemble_statistics['stdev'] < 0.1)
														
 
															+            (ensemble_statistics["raw_confidence"] < 0.7)
														
 
															+            & (ensemble_statistics["stdev"] < 0.1)
														
 
															         )
														
 
															     ]
														
 
															     normal_results = ensemble_statistics.loc[
														
 
															         ~(
														
 
															-            (ensemble_statistics['raw_confidence'] < 0.7)
														
 
															-            & (ensemble_statistics['stdev'] < 0.1)
														
 
															+            (ensemble_statistics["raw_confidence"] < 0.7)
														
 
															+            & (ensemble_statistics["stdev"] < 0.1)
														
 
															         )
														
 
															     ]
														
 
															     # Get the data ids in a list
														
 
															     # Plot the images
														
 
															-    if not os.path.exists(f'{V3_PATH}/images'):
														
 
															-        os.makedirs(f'{V3_PATH}/images/weird')
														
 
															-        os.makedirs(f'{V3_PATH}/images/normal')
														
 
															+    if not os.path.exists(f"{V3_PATH}/images"):
														
 
															+        os.makedirs(f"{V3_PATH}/images/weird")
														
 
															+        os.makedirs(f"{V3_PATH}/images/normal")
														
 
															     for i in weird_results.itertuples():
														
 
															         id = i.Index
														
@@ -431,8 +465,8 @@ def main():
 
															         plot_single_image(
														
 
															             id,
														
 
															             dataset,
														
 
															-            f'{V3_PATH}/images/weird/{id}.png',
														
 
															-            title=f'ID: {id}, Confidence: {conf}, Stdev: {stdev}',
														
 
															+            f"{V3_PATH}/images/weird/{id}.png",
														
 
															+            title=f"ID: {id}, Confidence: {conf}, Stdev: {stdev}",
														
 
															         )
														
 
															     for i in normal_results.itertuples():
														
@@ -443,8 +477,8 @@ def main():
 
															         plot_single_image(
														
 
															             id,
														
 
															             dataset,
														
 
															-            f'{V3_PATH}/images/normal/{id}.png',
														
 
															-            title=f'ID: {id}, Confidence: {conf}, Stdev: {stdev}',
														
 
															+            f"{V3_PATH}/images/normal/{id}.png",
														
 
															+            title=f"ID: {id}, Confidence: {conf}, Stdev: {stdev}",
														
 
															         )
														
 
															     # Calculate overall statistics
														
@@ -454,81 +488,81 @@ def main():
 
															     print(overall_statistics)
														
 
															     # Print overall ensemble statistics
														
 
															-    print('Ensemble Statistics')
														
 
															+    print("Ensemble Statistics")
														
 
															     print(f"Accuracy: {ensemble_statistics['correct'].mean()}")
														
 
															     print(
														
 
															         f"F1 Score: {metrics.f1_score(ensemble_statistics['actual'], ensemble_statistics['predicted'])}"
														
 
															     )
														
 
															     # Get the predictions, statistics and thresholds an individual model
														
 
															-    indv_id = config['ensemble']['individual_id']
														
 
															+    indv_id = config["ensemble"]["individual_id"]
														
 
															     indv_predictions = select_individual_model(ensemble_predictions, indv_id)
														
 
															     indv_statistics = calculate_statistics(indv_predictions)
														
 
															     # Calculate entropy and confidence thresholds for individual model
														
 
															     indv_entropy_thresholds = conduct_threshold_analysis(
														
 
															-        indv_statistics, 'entropy', low_to_high=True
														
 
															+        indv_statistics, "entropy", low_to_high=True
														
 
															     )
														
 
															     indv_confidence_thresholds = conduct_threshold_analysis(
														
 
															-        indv_statistics, 'confidence', low_to_high=False
														
 
															+        indv_statistics, "confidence", low_to_high=False
														
 
															     )
														
 
															     # Plot the threshold analysis for standard deviation
														
 
															     plot_threshold_analysis(
														
 
															-        stdev_thresholds['accuracy'],
														
 
															-        'Stdev Threshold Analysis for Accuracy',
														
 
															-        'Stdev Threshold',
														
 
															-        'Accuracy',
														
 
															-        f'{V3_PATH}/stdev_threshold_analysis.png',
														
 
															+        stdev_thresholds["accuracy"],
														
 
															+        "Stdev Threshold Analysis for Accuracy",
														
 
															+        "Stdev Threshold",
														
 
															+        "Accuracy",
														
 
															+        f"{V3_PATH}/stdev_threshold_analysis.png",
														
 
															         flip=True,
														
 
															     )
														
 
															     plot_threshold_analysis(
														
 
															-        stdev_thresholds['f1'],
														
 
															-        'Stdev Threshold Analysis for F1 Score',
														
 
															-        'Stdev Threshold',
														
 
															-        'F1 Score',
														
 
															-        f'{V3_PATH}/stdev_threshold_analysis_f1.png',
														
 
															+        stdev_thresholds["f1"],
														
 
															+        "Stdev Threshold Analysis for F1 Score",
														
 
															+        "Stdev Threshold",
														
 
															+        "F1 Score",
														
 
															+        f"{V3_PATH}/stdev_threshold_analysis_f1.png",
														
 
															         flip=True,
														
 
															     )
														
 
															     # Plot the threshold analysis for entropy
														
 
															     plot_threshold_analysis(
														
 
															-        entropy_thresholds['accuracy'],
														
 
															-        'Entropy Threshold Analysis for Accuracy',
														
 
															-        'Entropy Threshold',
														
 
															-        'Accuracy',
														
 
															-        f'{V3_PATH}/entropy_threshold_analysis.png',
														
 
															-        indv_entropy_thresholds['accuracy'],
														
 
															+        entropy_thresholds["accuracy"],
														
 
															+        "Entropy Threshold Analysis for Accuracy",
														
 
															+        "Entropy Threshold",
														
 
															+        "Accuracy",
														
 
															+        f"{V3_PATH}/entropy_threshold_analysis.png",
														
 
															+        indv_entropy_thresholds["accuracy"],
														
 
															         flip=True,
														
 
															     )
														
 
															     plot_threshold_analysis(
														
 
															-        entropy_thresholds['f1'],
														
 
															-        'Entropy Threshold Analysis for F1 Score',
														
 
															-        'Entropy Threshold',
														
 
															-        'F1 Score',
														
 
															-        f'{V3_PATH}/entropy_threshold_analysis_f1.png',
														
 
															-        indv_entropy_thresholds['f1'],
														
 
															+        entropy_thresholds["f1"],
														
 
															+        "Entropy Threshold Analysis for F1 Score",
														
 
															+        "Entropy Threshold",
														
 
															+        "F1 Score",
														
 
															+        f"{V3_PATH}/entropy_threshold_analysis_f1.png",
														
 
															+        indv_entropy_thresholds["f1"],
														
 
															         flip=True,
														
 
															     )
														
 
															     # Plot the threshold analysis for confidence
														
 
															     plot_threshold_analysis(
														
 
															-        confidence_thresholds['accuracy'],
														
 
															-        'Confidence Threshold Analysis for Accuracy',
														
 
															-        'Confidence Threshold',
														
 
															-        'Accuracy',
														
 
															-        f'{V3_PATH}/confidence_threshold_analysis.png',
														
 
															-        indv_confidence_thresholds['accuracy'],
														
 
															+        confidence_thresholds["accuracy"],
														
 
															+        "Confidence Threshold Analysis for Accuracy",
														
 
															+        "Confidence Threshold",
														
 
															+        "Accuracy",
														
 
															+        f"{V3_PATH}/confidence_threshold_analysis.png",
														
 
															+        indv_confidence_thresholds["accuracy"],
														
 
															     )
														
 
															     plot_threshold_analysis(
														
 
															-        confidence_thresholds['f1'],
														
 
															-        'Confidence Threshold Analysis for F1 Score',
														
 
															-        'Confidence Threshold',
														
 
															-        'F1 Score',
														
 
															-        f'{V3_PATH}/confidence_threshold_analysis_f1.png',
														
 
															-        indv_confidence_thresholds['f1'],
														
 
															+        confidence_thresholds["f1"],
														
 
															+        "Confidence Threshold Analysis for F1 Score",
														
 
															+        "Confidence Threshold",
														
 
															+        "F1 Score",
														
 
															+        f"{V3_PATH}/confidence_threshold_analysis_f1.png",
														
 
															+        indv_confidence_thresholds["f1"],
														
 
															     )
														
 
															-if __name__ == '__main__':
														
 
															+if __name__ == "__main__":
														
 
															     main()
														
--- a/threshold_xarray.py
+++ b/threshold_xarray.py
@@ -10,7 +10,9 @@ from tqdm import tqdm
 
															 import utils.metrics as met
														
 
															 import matplotlib.pyplot as plt
														
 
															 import matplotlib.ticker as mtick
														
 
															+import utils.models.cnn
														
 
															+torch.serialization.safe_globals([utils.models.cnn.CNN])
														
 
															 # The datastructures for this file are as follows
														
@@ -29,11 +31,11 @@ import matplotlib.ticker as mtick
 
															 # Loads configuration dictionary
														
 
															 def load_config():
														
 
															-    if os.getenv('ADL_CONFIG_PATH') is None:
														
 
															-        with open('config.toml', 'rb') as f:
														
 
															+    if os.getenv("ADL_CONFIG_PATH") is None:
														
 
															+        with open("config.toml", "rb") as f:
														
 
															             config = toml.load(f)
														
 
															     else:
														
 
															-        with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
														
 
															+        with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
														
 
															             config = toml.load(f)
														
 
															     return config
														
@@ -41,17 +43,20 @@ def load_config():
 
															 # Loads models into a dictionary
														
 
															 def load_models_v2(folder, device):
														
 
															-    glob_path = os.path.join(folder, '*.pt')
														
 
															+    glob_path = os.path.join(folder, "*.pt")
														
 
															     model_files = glob.glob(glob_path)
														
 
															     model_dict = {}
														
 
															     for model_file in model_files:
														
 
															+        with open(model_file, "r") as f:
														
 
															+            print(torch.serialization.get_unsafe_globals_in_checkpoint(f))
														
 
															+
														
 
															         model = torch.load(model_file, map_location=device)
														
 
															-        model_id = os.path.basename(model_file).split('_')[0]
														
 
															+        model_id = os.path.basename(model_file).split("_")[0]
														
 
															         model_dict[model_id] = model
														
 
															     if len(model_dict) == 0:
														
 
															-        raise FileNotFoundError('No models found in the specified directory: ' + folder)
														
 
															+        raise FileNotFoundError("No models found in the specified directory: " + folder)
														
 
															     return model_dict
														
@@ -67,8 +72,8 @@ def preprocess_data(data, device):
 
															 # Loads datasets and returns concatenated test and validation datasets
														
 
															 def load_datasets(ensemble_path):
														
 
															     return (
														
 
															-        torch.load(f'{ensemble_path}/test_dataset.pt'),
														
 
															-        torch.load(f'{ensemble_path}/val_dataset.pt'),
														
 
															+        torch.load(f"{ensemble_path}/test_dataset.pt"),
														
 
															+        torch.load(f"{ensemble_path}/val_dataset.pt"),
														
 
															     )
														
@@ -77,21 +82,21 @@ def get_ensemble_predictions(models, dataset, device, id_offset=0):
 
															     zeros = np.zeros((len(dataset), len(models), 4))
														
 
															     predictions = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('data_id', 'model_id', 'prediction_value'),
														
 
															+        dims=("data_id", "model_id", "prediction_value"),
														
 
															         coords={
														
 
															-            'data_id': range(id_offset, len(dataset) + id_offset),
														
 
															-            'model_id': list(models.keys()),
														
 
															-            'prediction_value': [
														
 
															-                'negative_prediction',
														
 
															-                'positive_prediction',
														
 
															-                'negative_actual',
														
 
															-                'positive_actual',
														
 
															+            "data_id": range(id_offset, len(dataset) + id_offset),
														
 
															+            "model_id": list(models.keys()),
														
 
															+            "prediction_value": [
														
 
															+                "negative_prediction",
														
 
															+                "positive_prediction",
														
 
															+                "negative_actual",
														
 
															+                "positive_actual",
														
 
															             ],
														
 
															         },
														
 
															     )
														
 
															     for data_id, (data, target) in tqdm(
														
 
															-        enumerate(dataset), total=len(dataset), unit='images'
														
 
															+        enumerate(dataset), total=len(dataset), unit="images"
														
 
															     ):
														
 
															         dat = preprocess_data(data, device)
														
 
															         actual = list(target.cpu().numpy())
														
@@ -101,8 +106,8 @@ def get_ensemble_predictions(models, dataset, device, id_offset=0):
 
															                 prediction = output.cpu().numpy().tolist()[0]
														
 
															                 predictions.loc[
														
 
															-                    {'data_id': data_id + id_offset, 'model_id': model_id}
														
 
															-                ] = prediction + actual
														
 
															+                    {"data_id": data_id + id_offset, "model_id": model_id}
														
 
															+                ] = (prediction + actual)
														
 
															     return predictions
														
@@ -113,27 +118,27 @@ def compute_ensemble_statistics(predictions: xr.DataArray):
 
															     ensemble_statistics = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('data_id', 'statistic'),
														
 
															+        dims=("data_id", "statistic"),
														
 
															         coords={
														
 
															-            'data_id': predictions.data_id,
														
 
															-            'statistic': [
														
 
															-                'mean',
														
 
															-                'stdev',
														
 
															-                'entropy',
														
 
															-                'confidence',
														
 
															-                'correct',
														
 
															-                'predicted',
														
 
															-                'actual',
														
 
															+            "data_id": predictions.data_id,
														
 
															+            "statistic": [
														
 
															+                "mean",
														
 
															+                "stdev",
														
 
															+                "entropy",
														
 
															+                "confidence",
														
 
															+                "correct",
														
 
															+                "predicted",
														
 
															+                "actual",
														
 
															             ],
														
 
															         },
														
 
															     )
														
 
															     for data_id in predictions.data_id:
														
 
															-        data = predictions.loc[{'data_id': data_id}]
														
 
															-        mean = data.mean(dim='model_id')[
														
 
															+        data = predictions.loc[{"data_id": data_id}]
														
 
															+        mean = data.mean(dim="model_id")[
														
 
															             0:2
														
 
															         ]  # Only take the predictions, not the actual
														
 
															-        stdev = data.std(dim='model_id')[
														
 
															+        stdev = data.std(dim="model_id")[
														
 
															             1
														
 
															         ]  # Only need the standard deviation of the postive prediction
														
 
															         entropy = (-mean * np.log(mean)).sum()
														
@@ -142,11 +147,11 @@ def compute_ensemble_statistics(predictions: xr.DataArray):
 
															         confidence = mean.max()
														
 
															         # only need one of the actual values, since they are all the same, just get the first actual_positive
														
 
															-        actual = data.loc[{'prediction_value': 'positive_actual'}][0]
														
 
															+        actual = data.loc[{"prediction_value": "positive_actual"}][0]
														
 
															         predicted = mean.argmax()
														
 
															         correct = actual == predicted
														
 
															-        ensemble_statistics.loc[{'data_id': data_id}] = [
														
 
															+        ensemble_statistics.loc[{"data_id": data_id}] = [
														
 
															             mean[1],
														
 
															             stdev,
														
 
															             entropy,
														
@@ -162,15 +167,15 @@ def compute_ensemble_statistics(predictions: xr.DataArray):
 
															 # Compute the thresholded predictions given an array of predictions
														
 
															 def compute_thresholded_predictions(input_stats: xr.DataArray):
														
 
															     quantiles = np.linspace(0.00, 1.00, 21) * 100
														
 
															-    metrics = ['accuracy', 'f1']
														
 
															-    statistics = ['stdev', 'entropy', 'confidence']
														
 
															+    metrics = ["accuracy", "f1"]
														
 
															+    statistics = ["stdev", "entropy", "confidence"]
														
 
															     zeros = np.zeros((len(quantiles), len(statistics), len(metrics)))
														
 
															     thresholded_predictions = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('quantile', 'statistic', 'metric'),
														
 
															-        coords={'quantile': quantiles, 'statistic': statistics, 'metric': metrics},
														
 
															+        dims=("quantile", "statistic", "metric"),
														
 
															+        coords={"quantile": quantiles, "statistic": statistics, "metric": metrics},
														
 
															     )
														
 
															     for statistic in statistics:
														
@@ -197,7 +202,7 @@ def compute_thresholded_predictions(input_stats: xr.DataArray):
 
															             for metric in metrics:
														
 
															                 thresholded_predictions.loc[
														
 
															-                    {'quantile': quantile, 'statistic': statistic, 'metric': metric}
														
 
															+                    {"quantile": quantile, "statistic": statistic, "metric": metric}
														
 
															                 ] = compute_metric(filtered_data, metric)
														
 
															     return thresholded_predictions
														
@@ -208,26 +213,26 @@ def compute_thresholded_predictions(input_stats: xr.DataArray):
 
															 # So we threshold confidence low to high, entropy and stdev high to low
														
 
															 # So any values BELOW the cutoff are removed for confidence, and any values ABOVE the cutoff are removed for entropy and stdev
														
 
															 def low_to_high(stat):
														
 
															-    return stat in ['confidence']
														
 
															+    return stat in ["confidence"]
														
 
															 # Compute a given metric on a DataArray of statstics
														
 
															 def compute_metric(arr, metric):
														
 
															-    if metric == 'accuracy':
														
 
															-        return np.mean(arr.loc[{'statistic': 'correct'}])
														
 
															-    elif metric == 'f1':
														
 
															+    if metric == "accuracy":
														
 
															+        return np.mean(arr.loc[{"statistic": "correct"}])
														
 
															+    elif metric == "f1":
														
 
															         return met.F1(
														
 
															-            arr.loc[{'statistic': 'predicted'}], arr.loc[{'statistic': 'actual'}]
														
 
															+            arr.loc[{"statistic": "predicted"}], arr.loc[{"statistic": "actual"}]
														
 
															         )
														
 
															-    elif metric == 'ece':
														
 
															-        true_labels = arr.loc[{'statistic': 'actual'}].values
														
 
															-        predicted_labels = arr.loc[{'statistic': 'predicted'}].values
														
 
															-        confidences = arr.loc[{'statistic': 'confidence'}].values
														
 
															+    elif metric == "ece":
														
 
															+        true_labels = arr.loc[{"statistic": "actual"}].values
														
 
															+        predicted_labels = arr.loc[{"statistic": "predicted"}].values
														
 
															+        confidences = arr.loc[{"statistic": "confidence"}].values
														
 
															         return calculate_ece_stats(confidences, predicted_labels, true_labels)
														
 
															     else:
														
 
															-        raise ValueError('Invalid metric: ' + metric)
														
 
															+        raise ValueError("Invalid metric: " + metric)
														
 
															 # Graph a thresholded prediction for a given statistic and metric
														
@@ -236,11 +241,11 @@ def graph_thresholded_prediction(
 
															 ):
														
 
															     data = thresholded_predictions.sel(statistic=statistic, metric=metric)
														
 
															-    x_data = data.coords['quantile'].values
														
 
															+    x_data = data.coords["quantile"].values
														
 
															     y_data = data.values
														
 
															     fig, ax = plt.subplots()
														
 
															-    ax.plot(x_data, y_data, 'bx-', label='Ensemble')
														
 
															+    ax.plot(x_data, y_data, "bx-", label="Ensemble")
														
 
															     ax.set_title(title)
														
 
															     ax.set_xlabel(xlabel)
														
 
															     ax.set_ylabel(ylabel)
														
@@ -257,68 +262,68 @@ def graph_all_thresholded_predictions(thresholded_predictions, save_path):
 
															     # Confidence Accuracy
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'confidence',
														
 
															-        'accuracy',
														
 
															-        f'{save_path}/confidence_accuracy.png',
														
 
															-        'Coverage Analysis of Confidence vs. Accuracy',
														
 
															-        'Minimum Confidence Percentile Threshold',
														
 
															-        'Accuracy',
														
 
															+        "confidence",
														
 
															+        "accuracy",
														
 
															+        f"{save_path}/confidence_accuracy.png",
														
 
															+        "Coverage Analysis of Confidence vs. Accuracy",
														
 
															+        "Minimum Confidence Percentile Threshold",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     # Confidence F1
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'confidence',
														
 
															-        'f1',
														
 
															-        f'{save_path}/confidence_f1.png',
														
 
															-        'Coverage Analysis of Confidence vs. F1 Score',
														
 
															-        'Minimum Confidence Percentile Threshold',
														
 
															-        'F1 Score',
														
 
															+        "confidence",
														
 
															+        "f1",
														
 
															+        f"{save_path}/confidence_f1.png",
														
 
															+        "Coverage Analysis of Confidence vs. F1 Score",
														
 
															+        "Minimum Confidence Percentile Threshold",
														
 
															+        "F1 Score",
														
 
															     )
														
 
															     # Entropy Accuracy
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'entropy',
														
 
															-        'accuracy',
														
 
															-        f'{save_path}/entropy_accuracy.png',
														
 
															-        'Coverage Analysis of Entropy vs. Accuracy',
														
 
															-        'Maximum Entropy Percentile Threshold',
														
 
															-        'Accuracy',
														
 
															+        "entropy",
														
 
															+        "accuracy",
														
 
															+        f"{save_path}/entropy_accuracy.png",
														
 
															+        "Coverage Analysis of Entropy vs. Accuracy",
														
 
															+        "Maximum Entropy Percentile Threshold",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     # Entropy F1
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'entropy',
														
 
															-        'f1',
														
 
															-        f'{save_path}/entropy_f1.png',
														
 
															-        'Coverage Analysis of Entropy vs. F1 Score',
														
 
															-        'Maximum Entropy Percentile Threshold',
														
 
															-        'F1 Score',
														
 
															+        "entropy",
														
 
															+        "f1",
														
 
															+        f"{save_path}/entropy_f1.png",
														
 
															+        "Coverage Analysis of Entropy vs. F1 Score",
														
 
															+        "Maximum Entropy Percentile Threshold",
														
 
															+        "F1 Score",
														
 
															     )
														
 
															     # Stdev Accuracy
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'stdev',
														
 
															-        'accuracy',
														
 
															-        f'{save_path}/stdev_accuracy.png',
														
 
															-        'Coverage Analysis of Standard Deviation vs. Accuracy',
														
 
															-        'Maximum Standard Deviation Percentile Threshold',
														
 
															-        'Accuracy',
														
 
															+        "stdev",
														
 
															+        "accuracy",
														
 
															+        f"{save_path}/stdev_accuracy.png",
														
 
															+        "Coverage Analysis of Standard Deviation vs. Accuracy",
														
 
															+        "Maximum Standard Deviation Percentile Threshold",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     # Stdev F1
														
 
															     graph_thresholded_prediction(
														
 
															         thresholded_predictions,
														
 
															-        'stdev',
														
 
															-        'f1',
														
 
															-        f'{save_path}/stdev_f1.png',
														
 
															-        'Coverage Analysis of Standard Deviation vs. F1 Score',
														
 
															-        'Maximum Standard Deviation Percentile Threshold',
														
 
															-        'F1',
														
 
															+        "stdev",
														
 
															+        "f1",
														
 
															+        f"{save_path}/stdev_f1.png",
														
 
															+        "Coverage Analysis of Standard Deviation vs. F1 Score",
														
 
															+        "Maximum Standard Deviation Percentile Threshold",
														
 
															+        "F1",
														
 
															     )
														
@@ -326,13 +331,13 @@ def graph_all_thresholded_predictions(thresholded_predictions, save_path):
 
															 def graph_statistics(stats, x_stat, y_stat, save_path, title, xlabel, ylabel):
														
 
															     # Filter for correct predictions
														
 
															     c_stats = stats.where(
														
 
															-        stats.data_id.isin(np.where((stats.sel(statistic='correct') == 1).values)),
														
 
															+        stats.data_id.isin(np.where((stats.sel(statistic="correct") == 1).values)),
														
 
															         drop=True,
														
 
															     )
														
 
															     # Filter for incorrect predictions
														
 
															     i_stats = stats.where(
														
 
															-        stats.data_id.isin(np.where((stats.sel(statistic='correct') == 0).values)),
														
 
															+        stats.data_id.isin(np.where((stats.sel(statistic="correct") == 0).values)),
														
 
															         drop=True,
														
 
															     )
														
@@ -344,8 +349,8 @@ def graph_statistics(stats, x_stat, y_stat, save_path, title, xlabel, ylabel):
 
															     y_data_i = i_stats.sel(statistic=y_stat).values
														
 
															     fig, ax = plt.subplots()
														
 
															-    ax.plot(x_data_c, y_data_c, 'go', label='Correct')
														
 
															-    ax.plot(x_data_i, y_data_i, 'ro', label='Incorrect')
														
 
															+    ax.plot(x_data_c, y_data_c, "go", label="Correct")
														
 
															+    ax.plot(x_data_i, y_data_i, "ro", label="Incorrect")
														
 
															     ax.set_title(title)
														
 
															     ax.set_xlabel(xlabel)
														
 
															     ax.set_ylabel(ylabel)
														
@@ -365,26 +370,26 @@ def compute_individual_statistics(predictions: xr.DataArray):
 
															     indv_statistics = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('data_id', 'model_id', 'statistic'),
														
 
															+        dims=("data_id", "model_id", "statistic"),
														
 
															         coords={
														
 
															-            'data_id': predictions.data_id,
														
 
															-            'model_id': predictions.model_id,
														
 
															-            'statistic': [
														
 
															-                'mean',
														
 
															-                'entropy',
														
 
															-                'confidence',
														
 
															-                'correct',
														
 
															-                'predicted',
														
 
															-                'actual',
														
 
															+            "data_id": predictions.data_id,
														
 
															+            "model_id": predictions.model_id,
														
 
															+            "statistic": [
														
 
															+                "mean",
														
 
															+                "entropy",
														
 
															+                "confidence",
														
 
															+                "correct",
														
 
															+                "predicted",
														
 
															+                "actual",
														
 
															             ],
														
 
															         },
														
 
															     )
														
 
															     for data_id in tqdm(
														
 
															-        predictions.data_id, total=len(predictions.data_id), unit='images'
														
 
															+        predictions.data_id, total=len(predictions.data_id), unit="images"
														
 
															     ):
														
 
															         for model_id in predictions.model_id:
														
 
															-            data = predictions.loc[{'data_id': data_id, 'model_id': model_id}]
														
 
															+            data = predictions.loc[{"data_id": data_id, "model_id": model_id}]
														
 
															             mean = data[0:2]
														
 
															             entropy = (-mean * np.log(mean)).sum()
														
 
															             confidence = mean.max()
														
@@ -392,7 +397,7 @@ def compute_individual_statistics(predictions: xr.DataArray):
 
															             predicted = mean.argmax()
														
 
															             correct = actual == predicted
														
 
															-            indv_statistics.loc[{'data_id': data_id, 'model_id': model_id}] = [
														
 
															+            indv_statistics.loc[{"data_id": data_id, "model_id": model_id}] = [
														
 
															                 mean[1],
														
 
															                 entropy,
														
 
															                 confidence,
														
@@ -407,8 +412,8 @@ def compute_individual_statistics(predictions: xr.DataArray):
 
															 # Compute individual model thresholds
														
 
															 def compute_individual_thresholds(input_stats: xr.DataArray):
														
 
															     quantiles = np.linspace(0.05, 0.95, 19) * 100
														
 
															-    metrics = ['accuracy', 'f1']
														
 
															-    statistics = ['entropy', 'confidence']
														
 
															+    metrics = ["accuracy", "f1"]
														
 
															+    statistics = ["entropy", "confidence"]
														
 
															     zeros = np.zeros(
														
 
															         (len(input_stats.model_id), len(quantiles), len(statistics), len(metrics))
														
@@ -416,17 +421,17 @@ def compute_individual_thresholds(input_stats: xr.DataArray):
 
															     indv_thresholds = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('model_id', 'quantile', 'statistic', 'metric'),
														
 
															+        dims=("model_id", "quantile", "statistic", "metric"),
														
 
															         coords={
														
 
															-            'model_id': input_stats.model_id,
														
 
															-            'quantile': quantiles,
														
 
															-            'statistic': statistics,
														
 
															-            'metric': metrics,
														
 
															+            "model_id": input_stats.model_id,
														
 
															+            "quantile": quantiles,
														
 
															+            "statistic": statistics,
														
 
															+            "metric": metrics,
														
 
															         },
														
 
															     )
														
 
															     for model_id in tqdm(
														
 
															-        input_stats.model_id, total=len(input_stats.model_id), unit='models'
														
 
															+        input_stats.model_id, total=len(input_stats.model_id), unit="models"
														
 
															     ):
														
 
															         for statistic in statistics:
														
 
															             # First, we must compute the quantiles for the statistic
														
@@ -457,10 +462,10 @@ def compute_individual_thresholds(input_stats: xr.DataArray):
 
															                 for metric in metrics:
														
 
															                     indv_thresholds.loc[
														
 
															                         {
														
 
															-                            'model_id': model_id,
														
 
															-                            'quantile': quantile,
														
 
															-                            'statistic': statistic,
														
 
															-                            'metric': metric,
														
 
															+                            "model_id": model_id,
														
 
															+                            "quantile": quantile,
														
 
															+                            "statistic": statistic,
														
 
															+                            "metric": metric,
														
 
															                         }
														
 
															                     ] = compute_metric(filtered_data, metric)
														
@@ -481,18 +486,18 @@ def graph_individual_thresholded_predictions(
 
															     data = indv_thresholds.sel(statistic=statistic, metric=metric)
														
 
															     e_data = ensemble_thresholds.sel(statistic=statistic, metric=metric)
														
 
															-    x_data = data.coords['quantile'].values
														
 
															+    x_data = data.coords["quantile"].values
														
 
															     y_data = data.values
														
 
															-    e_x_data = e_data.coords['quantile'].values
														
 
															+    e_x_data = e_data.coords["quantile"].values
														
 
															     e_y_data = e_data.values
														
 
															     fig, ax = plt.subplots()
														
 
															-    for model_id in data.coords['model_id'].values:
														
 
															+    for model_id in data.coords["model_id"].values:
														
 
															         model_data = data.sel(model_id=model_id)
														
 
															         ax.plot(x_data, model_data)
														
 
															-    ax.plot(e_x_data, e_y_data, 'kx-', label='Ensemble')
														
 
															+    ax.plot(e_x_data, e_y_data, "kx-", label="Ensemble")
														
 
															     ax.set_title(title)
														
 
															     ax.set_xlabel(xlabel)
														
@@ -514,48 +519,48 @@ def graph_all_individual_thresholded_predictions(
 
															     graph_individual_thresholded_predictions(
														
 
															         indv_thresholds,
														
 
															         ensemble_thresholds,
														
 
															-        'confidence',
														
 
															-        'accuracy',
														
 
															-        f'{save_path}/indv/confidence_accuracy.png',
														
 
															-        'Coverage Analysis of Confidence vs. Accuracy for All Models',
														
 
															-        'Minumum Confidence Percentile Threshold',
														
 
															-        'Accuracy',
														
 
															+        "confidence",
														
 
															+        "accuracy",
														
 
															+        f"{save_path}/indv/confidence_accuracy.png",
														
 
															+        "Coverage Analysis of Confidence vs. Accuracy for All Models",
														
 
															+        "Minumum Confidence Percentile Threshold",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     # Confidence F1
														
 
															     graph_individual_thresholded_predictions(
														
 
															         indv_thresholds,
														
 
															         ensemble_thresholds,
														
 
															-        'confidence',
														
 
															-        'f1',
														
 
															-        f'{save_path}/indv/confidence_f1.png',
														
 
															-        'Coverage Analysis of Confidence vs. F1 Score for All Models',
														
 
															-        'Minimum Confidence Percentile Threshold',
														
 
															-        'F1 Score',
														
 
															+        "confidence",
														
 
															+        "f1",
														
 
															+        f"{save_path}/indv/confidence_f1.png",
														
 
															+        "Coverage Analysis of Confidence vs. F1 Score for All Models",
														
 
															+        "Minimum Confidence Percentile Threshold",
														
 
															+        "F1 Score",
														
 
															     )
														
 
															     # Entropy Accuracy
														
 
															     graph_individual_thresholded_predictions(
														
 
															         indv_thresholds,
														
 
															         ensemble_thresholds,
														
 
															-        'entropy',
														
 
															-        'accuracy',
														
 
															-        f'{save_path}/indv/entropy_accuracy.png',
														
 
															-        'Coverage Analysis of Entropy vs. Accuracy for All Models',
														
 
															-        'Maximum Entropy Percentile Threshold',
														
 
															-        'Accuracy',
														
 
															+        "entropy",
														
 
															+        "accuracy",
														
 
															+        f"{save_path}/indv/entropy_accuracy.png",
														
 
															+        "Coverage Analysis of Entropy vs. Accuracy for All Models",
														
 
															+        "Maximum Entropy Percentile Threshold",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     # Entropy F1
														
 
															     graph_individual_thresholded_predictions(
														
 
															         indv_thresholds,
														
 
															         ensemble_thresholds,
														
 
															-        'entropy',
														
 
															-        'f1',
														
 
															-        f'{save_path}/indv/entropy_f1.png',
														
 
															-        'Coverage Analysis of Entropy vs. F1 Score for All Models',
														
 
															-        'Maximum Entropy Percentile Threshold',
														
 
															-        'F1 Score',
														
 
															+        "entropy",
														
 
															+        "f1",
														
 
															+        f"{save_path}/indv/entropy_f1.png",
														
 
															+        "Coverage Analysis of Entropy vs. F1 Score for All Models",
														
 
															+        "Maximum Entropy Percentile Threshold",
														
 
															+        "F1 Score",
														
 
															     )
														
@@ -570,38 +575,38 @@ def calculate_subset_statistics(predictions: xr.DataArray):
 
															     subset_stats = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('data_id', 'model_count', 'statistic'),
														
 
															+        dims=("data_id", "model_count", "statistic"),
														
 
															         coords={
														
 
															-            'data_id': predictions.data_id,
														
 
															-            'model_count': subsets,
														
 
															-            'statistic': [
														
 
															-                'mean',
														
 
															-                'stdev',
														
 
															-                'entropy',
														
 
															-                'confidence',
														
 
															-                'correct',
														
 
															-                'predicted',
														
 
															-                'actual',
														
 
															+            "data_id": predictions.data_id,
														
 
															+            "model_count": subsets,
														
 
															+            "statistic": [
														
 
															+                "mean",
														
 
															+                "stdev",
														
 
															+                "entropy",
														
 
															+                "confidence",
														
 
															+                "correct",
														
 
															+                "predicted",
														
 
															+                "actual",
														
 
															             ],
														
 
															         },
														
 
															     )
														
 
															     for data_id in tqdm(
														
 
															-        predictions.data_id, total=len(predictions.data_id), unit='images'
														
 
															+        predictions.data_id, total=len(predictions.data_id), unit="images"
														
 
															     ):
														
 
															         for subset in subsets:
														
 
															             data = predictions.sel(
														
 
															                 data_id=data_id, model_id=predictions.model_id[:subset]
														
 
															             )
														
 
															-            mean = data.mean(dim='model_id')[0:2]
														
 
															-            stdev = data.std(dim='model_id')[1]
														
 
															+            mean = data.mean(dim="model_id")[0:2]
														
 
															+            stdev = data.std(dim="model_id")[1]
														
 
															             entropy = (-mean * np.log(mean)).sum()
														
 
															             confidence = mean.max()
														
 
															             actual = data[0][3]
														
 
															             predicted = mean.argmax()
														
 
															             correct = actual == predicted
														
 
															-            subset_stats.loc[{'data_id': data_id, 'model_count': subset}] = [
														
 
															+            subset_stats.loc[{"data_id": data_id, "model_count": subset}] = [
														
 
															                 mean[1],
														
 
															                 stdev,
														
 
															                 entropy,
														
@@ -617,24 +622,24 @@ def calculate_subset_statistics(predictions: xr.DataArray):
 
															 # Calculate Accuracy, F1 and ECE for subset stats - sensityvity analysis
														
 
															 def calculate_sensitivity_analysis(subset_stats: xr.DataArray):
														
 
															     subsets = subset_stats.model_count
														
 
															-    stats = ['accuracy', 'f1', 'ece']
														
 
															+    stats = ["accuracy", "f1", "ece"]
														
 
															     zeros = np.zeros((len(subsets), len(stats)))
														
 
															     sens_analysis = xr.DataArray(
														
 
															         zeros,
														
 
															-        dims=('model_count', 'statistic'),
														
 
															-        coords={'model_count': subsets, 'statistic': stats},
														
 
															+        dims=("model_count", "statistic"),
														
 
															+        coords={"model_count": subsets, "statistic": stats},
														
 
															     )
														
 
															-    for subset in tqdm(subsets, total=len(subsets), unit='model subsets'):
														
 
															+    for subset in tqdm(subsets, total=len(subsets), unit="model subsets"):
														
 
															         data = subset_stats.sel(model_count=subset)
														
 
															-        acc = compute_metric(data, 'accuracy').item()
														
 
															-        f1 = compute_metric(data, 'f1').item()
														
 
															-        ece = compute_metric(data, 'ece').item()
														
 
															+        acc = compute_metric(data, "accuracy").item()
														
 
															+        f1 = compute_metric(data, "f1").item()
														
 
															+        ece = compute_metric(data, "ece").item()
														
 
															-        sens_analysis.loc[{'model_count': subset.item()}] = [acc, f1, ece]
														
 
															+        sens_analysis.loc[{"model_count": subset.item()}] = [acc, f1, ece]
														
 
															     return sens_analysis
														
@@ -644,7 +649,7 @@ def graph_sensitivity_analysis(
 
															 ):
														
 
															     data = sens_analysis.sel(statistic=statistic)
														
 
															-    xdata = data.coords['model_count'].values
														
 
															+    xdata = data.coords["model_count"].values
														
 
															     ydata = data.values
														
 
															     fig, ax = plt.subplots()
														
@@ -657,10 +662,10 @@ def graph_sensitivity_analysis(
 
															 def calculate_overall_stats(ensemble_statistics: xr.DataArray):
														
 
															-    accuracy = compute_metric(ensemble_statistics, 'accuracy')
														
 
															-    f1 = compute_metric(ensemble_statistics, 'f1')
														
 
															+    accuracy = compute_metric(ensemble_statistics, "accuracy")
														
 
															+    f1 = compute_metric(ensemble_statistics, "f1")
														
 
															-    return {'accuracy': accuracy.item(), 'f1': f1.item()}
														
 
															+    return {"accuracy": accuracy.item(), "f1": f1.item()}
														
 
															 # https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d
														
@@ -693,130 +698,130 @@ def plot_ece_graph(ece_stats, title, xlabel, ylabel, save_path):
 
															 # Main Function
														
 
															 def main():
														
 
															-    print('Loading Config...')
														
 
															+    print("Loading Config...")
														
 
															     config = load_config()
														
 
															     ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
														
 
															-    V4_PATH = ENSEMBLE_PATH + '/v4'
														
 
															+    V4_PATH = ENSEMBLE_PATH + "/v4"
														
 
															     if not os.path.exists(V4_PATH):
														
 
															         os.makedirs(V4_PATH)
														
 
															-    print('Config Loaded')
														
 
															+    print("Config Loaded")
														
 
															     # Load Datasets
														
 
															-    print('Loading Datasets...')
														
 
															+    print("Loading Datasets...")
														
 
															     (test_dataset, val_dataset) = load_datasets(ENSEMBLE_PATH)
														
 
															-    print('Datasets Loaded')
														
 
															+    print("Datasets Loaded")
														
 
															     # Get Predictions, either by running the models or loading them from a file
														
 
															-    if config['ensemble']['run_models']:
														
 
															+    if config["ensemble"]["run_models"]:
														
 
															         # Load Models
														
 
															-        print('Loading Models...')
														
 
															-        device = torch.device(config['training']['device'])
														
 
															-        models = load_models_v2(f'{ENSEMBLE_PATH}/models/', device)
														
 
															-        print('Models Loaded')
														
 
															+        print("Loading Models...")
														
 
															+        device = torch.device(config["training"]["device"])
														
 
															+        models = load_models_v2(f"{ENSEMBLE_PATH}/models/", device)
														
 
															+        print("Models Loaded")
														
 
															         # Get Predictions
														
 
															-        print('Getting Predictions...')
														
 
															+        print("Getting Predictions...")
														
 
															         test_predictions = get_ensemble_predictions(models, test_dataset, device)
														
 
															         val_predictions = get_ensemble_predictions(
														
 
															             models, val_dataset, device, len(test_dataset)
														
 
															         )
														
 
															-        print('Predictions Loaded')
														
 
															+        print("Predictions Loaded")
														
 
															         # Save Prediction
														
 
															-        test_predictions.to_netcdf(f'{V4_PATH}/test_predictions.nc')
														
 
															-        val_predictions.to_netcdf(f'{V4_PATH}/val_predictions.nc')
														
 
															+        test_predictions.to_netcdf(f"{V4_PATH}/test_predictions.nc")
														
 
															+        val_predictions.to_netcdf(f"{V4_PATH}/val_predictions.nc")
														
 
															     else:
														
 
															-        test_predictions = xr.open_dataarray(f'{V4_PATH}/test_predictions.nc')
														
 
															-        val_predictions = xr.open_dataarray(f'{V4_PATH}/val_predictions.nc')
														
 
															+        test_predictions = xr.open_dataarray(f"{V4_PATH}/test_predictions.nc")
														
 
															+        val_predictions = xr.open_dataarray(f"{V4_PATH}/val_predictions.nc")
														
 
															     # Prune Data
														
 
															-    print('Pruning Data...')
														
 
															-    if config['operation']['exclude_blank_ids']:
														
 
															-        excluded_data_ids = config['ensemble']['excluded_ids']
														
 
															+    print("Pruning Data...")
														
 
															+    if config["operation"]["exclude_blank_ids"]:
														
 
															+        excluded_data_ids = config["ensemble"]["excluded_ids"]
														
 
															         test_predictions = prune_data(test_predictions, excluded_data_ids)
														
 
															         val_predictions = prune_data(val_predictions, excluded_data_ids)
														
 
															     # Concatenate Predictions
														
 
															-    predictions = xr.concat([test_predictions, val_predictions], dim='data_id')
														
 
															+    predictions = xr.concat([test_predictions, val_predictions], dim="data_id")
														
 
															     # Compute Ensemble Statistics
														
 
															-    print('Computing Ensemble Statistics...')
														
 
															+    print("Computing Ensemble Statistics...")
														
 
															     ensemble_statistics = compute_ensemble_statistics(predictions)
														
 
															-    ensemble_statistics.to_netcdf(f'{V4_PATH}/ensemble_statistics.nc')
														
 
															-    print('Ensemble Statistics Computed')
														
 
															+    ensemble_statistics.to_netcdf(f"{V4_PATH}/ensemble_statistics.nc")
														
 
															+    print("Ensemble Statistics Computed")
														
 
															     # Compute Thresholded Predictions
														
 
															-    print('Computing Thresholded Predictions...')
														
 
															+    print("Computing Thresholded Predictions...")
														
 
															     thresholded_predictions = compute_thresholded_predictions(ensemble_statistics)
														
 
															-    thresholded_predictions.to_netcdf(f'{V4_PATH}/thresholded_predictions.nc')
														
 
															-    print('Thresholded Predictions Computed')
														
 
															+    thresholded_predictions.to_netcdf(f"{V4_PATH}/thresholded_predictions.nc")
														
 
															+    print("Thresholded Predictions Computed")
														
 
															     # Graph Thresholded Predictions
														
 
															-    print('Graphing Thresholded Predictions...')
														
 
															+    print("Graphing Thresholded Predictions...")
														
 
															     graph_all_thresholded_predictions(thresholded_predictions, V4_PATH)
														
 
															-    print('Thresholded Predictions Graphed')
														
 
															+    print("Thresholded Predictions Graphed")
														
 
															     # Additional Graphs
														
 
															-    print('Graphing Additional Graphs...')
														
 
															+    print("Graphing Additional Graphs...")
														
 
															     # Confidence vs stdev
														
 
															     graph_statistics(
														
 
															         ensemble_statistics,
														
 
															-        'confidence',
														
 
															-        'stdev',
														
 
															-        f'{V4_PATH}/confidence_stdev.png',
														
 
															-        'Confidence and Standard Deviation for Predictions',
														
 
															-        'Confidence',
														
 
															-        'Standard Deviation',
														
 
															+        "confidence",
														
 
															+        "stdev",
														
 
															+        f"{V4_PATH}/confidence_stdev.png",
														
 
															+        "Confidence and Standard Deviation for Predictions",
														
 
															+        "Confidence",
														
 
															+        "Standard Deviation",
														
 
															     )
														
 
															-    print('Additional Graphs Graphed')
														
 
															+    print("Additional Graphs Graphed")
														
 
															     # Compute Individual Statistics
														
 
															-    print('Computing Individual Statistics...')
														
 
															+    print("Computing Individual Statistics...")
														
 
															     indv_statistics = compute_individual_statistics(predictions)
														
 
															-    indv_statistics.to_netcdf(f'{V4_PATH}/indv_statistics.nc')
														
 
															-    print('Individual Statistics Computed')
														
 
															+    indv_statistics.to_netcdf(f"{V4_PATH}/indv_statistics.nc")
														
 
															+    print("Individual Statistics Computed")
														
 
															     # Compute Individual Thresholds
														
 
															-    print('Computing Individual Thresholds...')
														
 
															+    print("Computing Individual Thresholds...")
														
 
															     indv_thresholds = compute_individual_thresholds(indv_statistics)
														
 
															-    indv_thresholds.to_netcdf(f'{V4_PATH}/indv_thresholds.nc')
														
 
															-    print('Individual Thresholds Computed')
														
 
															+    indv_thresholds.to_netcdf(f"{V4_PATH}/indv_thresholds.nc")
														
 
															+    print("Individual Thresholds Computed")
														
 
															     # Graph Individual Thresholded Predictions
														
 
															-    print('Graphing Individual Thresholded Predictions...')
														
 
															-    if not os.path.exists(f'{V4_PATH}/indv'):
														
 
															-        os.makedirs(f'{V4_PATH}/indv')
														
 
															+    print("Graphing Individual Thresholded Predictions...")
														
 
															+    if not os.path.exists(f"{V4_PATH}/indv"):
														
 
															+        os.makedirs(f"{V4_PATH}/indv")
														
 
															     graph_all_individual_thresholded_predictions(
														
 
															         indv_thresholds, thresholded_predictions, V4_PATH
														
 
															     )
														
 
															-    print('Individual Thresholded Predictions Graphed')
														
 
															+    print("Individual Thresholded Predictions Graphed")
														
 
															     # Compute subset statistics and graph
														
 
															-    print('Computing Sensitivity Analysis...')
														
 
															+    print("Computing Sensitivity Analysis...")
														
 
															     subset_stats = calculate_subset_statistics(predictions)
														
 
															     sens_analysis = calculate_sensitivity_analysis(subset_stats)
														
 
															     graph_sensitivity_analysis(
														
 
															         sens_analysis,
														
 
															-        'accuracy',
														
 
															-        f'{V4_PATH}/sens_analysis.png',
														
 
															-        'Sensitivity Analsis of Accuracy vs. # of Models',
														
 
															-        '# of Models',
														
 
															-        'Accuracy',
														
 
															+        "accuracy",
														
 
															+        f"{V4_PATH}/sens_analysis.png",
														
 
															+        "Sensitivity Analsis of Accuracy vs. # of Models",
														
 
															+        "# of Models",
														
 
															+        "Accuracy",
														
 
															     )
														
 
															     graph_sensitivity_analysis(
														
 
															         sens_analysis,
														
 
															-        'ece',
														
 
															-        f'{V4_PATH}/sens_analysis_ece.png',
														
 
															-        'Sensitivity Analysis of ECE vs. # of Models',
														
 
															-        '# of Models',
														
 
															-        'ECE',
														
 
															+        "ece",
														
 
															+        f"{V4_PATH}/sens_analysis_ece.png",
														
 
															+        "Sensitivity Analysis of ECE vs. # of Models",
														
 
															+        "# of Models",
														
 
															+        "ECE",
														
 
															     )
														
 
															-    print(sens_analysis.sel(statistic='accuracy'))
														
 
															+    print(sens_analysis.sel(statistic="accuracy"))
														
 
															     print(calculate_overall_stats(ensemble_statistics))
														
 
															-if __name__ == '__main__':
														
 
															+if __name__ == "__main__":
														
 
															     main()
														
--- a/utils/data/datasets.py
+++ b/utils/data/datasets.py
@@ -9,7 +9,8 @@ from torch.utils.data import Dataset
 
															 import pandas as pd
														
 
															 from torch.utils.data import DataLoader
														
 
															 import math
														
 
															-
														
 
															+from typing import Tuple
														
 
															+import pathlib as pl
														
 
															 """
														
 
															 Prepares CustomDatasets for training, validating, and testing CNN
														
@@ -102,7 +103,7 @@ def get_train_val_test(AD_list, NL_list, val_split):
 
															 class ADNIDataset(Dataset):
														
 
															-    def __init__(self, mri, xls: pd.DataFrame, device=torch.device("cpu")):
														
 
															+    def __init__(self, mri, xls: pd.DataFrame, data_dir: pl.Path, device:torch.device =torch.device("cpu"), ):
														
 
															         self.mri_data = mri  # DATA IS A LIST WITH TUPLES (image_dir, class_id)
														
 
															         self.xls_data = xls
														
 
															         self.device = device
														
@@ -124,9 +125,15 @@ class ADNIDataset(Dataset):
 
															         return xls_tensor
														
 
															     def __getitem__(
														
 
															-        self, idx
														
 
															-    ):  # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
														
 
															+        self, idx: int
														
 
															+    ) -> Tuple[
														
 
															+        Tuple[torch.Tensor, torch.Tensor], torch.Tensor
														
 
															+    ]:  # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
														
 
															         mri_path, class_id = self.mri_data[idx]
														
 
															+
														
 
															+        mri_path = pl.Path(mri_path).name
														
 
															+        adj_path = self.
														
 
															+
														
 
															         mri = nib.load(mri_path)
														
 
															         mri_data = mri.get_fdata()
														
@@ -147,6 +154,10 @@ class ADNIDataset(Dataset):
 
															         return (mri_tensor, xls_tensor), class_id
														
 
															+    def __iter__(self):
														
 
															+        for i in range(len(self)):
														
 
															+            yield self.__getitem__(i)
														
 
															+
														
 
															 def initalize_dataloaders(
														
 
															     training_data,
														
--- a/utils/ensemble.py
+++ b/utils/ensemble.py
@@ -1,30 +1,51 @@
 
															 import torch
														
 
															-import os
														
 
															-from glob import glob
														
 
															+import pathlib
														
 
															+import utils.models.cnn as c
														
 
															+from typing import Tuple, List
														
 
															+import xarray as xr
														
 
															+type ModelPair = Tuple[c.CNN, str]
														
 
															+type ModelPredictionData = xr.DataArray
														
 
															+type InputData = Tuple[torch.Tensor, torch.Tensor]
														
 
															+
														
 
															 # This file contains functions to ensemble a folder of models and evaluate them on a test set, with included uncertainty estimation.
														
 
															-def load_models(folder, device):
														
 
															-    glob_path = os.path.join(folder, "*.pt")
														
 
															-    model_files = glob(glob_path)
														
 
															+def load_models(folder: pathlib.Path, device: str) -> List[ModelPair]:
														
 
															+    model_files = folder.glob("*.pt")
														
 
															-    models = []
														
 
															-    model_descs = []
														
 
															+    model_pairs: List[ModelPair] = []
														
 
															     for model_file in model_files:
														
 
															-        model = torch.load(model_file, map_location=device)
														
 
															-        models.append(model)
														
 
															+        model: c.CNN = torch.load(model_file, map_location=device, weights_only=False)
														
 
															         # Extract model description from filename
														
 
															-        desc = os.path.basename(model_file)
														
 
															-        model_descs.append(os.path.splitext(desc)[0])
														
 
															+        model_pairs.append((model, model_file.stem))
														
 
															+
														
 
															+    return model_pairs
														
 
															+
														
 
															+
														
 
															+def prepare_datasets(data: Tuple[torch.Tensor, torch.Tensor]) -> InputData:
														
 
															+    # Ensure the data is in the correct format
														
 
															+    mri_data.unsqueeze(0)
														
 
															+    xls_data.unsqueeze(0)
														
 
															+
														
 
															+    # Combine MRI and XLS data into a tuple
														
 
															+    return (mri_data, xls_data)
														
 
															+
														
 
															+
														
 
															+def get_model_names(models: List[ModelPair]) -> List[str]:
														
 
															+    # Extract model names from the model pairs
														
 
															+    return [model_pair[1] for model_pair in models]
														
 
															+
														
 
															-    return models, model_descs
														
 
															+def get_model_objects(models: List[ModelPair]) -> List[c.CNN]:
														
 
															+    # Extract model objects from the model pairs
														
 
															+    return [model_pair[0] for model_pair in models]
														
 
															-def ensemble_predict(models, input):
														
 
															+def ensemble_predict(models: List[c.CNN], input: InputData):
														
 
															     predictions = []
														
 
															     for model in models:
														
 
															         model.eval()
														
--- a/utils/models/cnn.py
+++ b/utils/models/cnn.py
@@ -1,3 +1,4 @@
 
															+from typing import Tuple
														
 
															 from torch import nn
														
 
															 import utils.models.layers as ly
														
 
															 import torch
														
@@ -36,8 +37,8 @@ class CNN(nn.Module):
 
															         self.dense2 = nn.Linear(5, 2)
														
 
															         self.softmax = nn.Softmax(dim=1)
														
 
															-    def forward(self, x):
														
 
															-        image, clin_data = x
														
 
															+    def forward(self, x_in: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
														
 
															+        image, clin_data = x_in
														
 
															         image = self.image_section(image)