11 kuukautta sitten · 8a70abc2e8
--- a/threshold_refac.py
+++ b/threshold_refac.py
@@ -0,0 +1,99 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import os
			
 
				+import tomli as toml
			
 
				+from utils.data.datasets import prepare_datasets
			
 
				+import utils.ensemble as ens
			
 
				+import torch
			
 
				+import matplotlib.pyplot as plt
			
 
				+import sklearn.metrics as metrics
			
 
				+from tqdm import tqdm
			
 
				+import utils.metrics as met
			
 
				+import itertools as it
			
 
				+import matplotlib.ticker as ticker
			
 
				+import glob
			
 
				+
			
 
				+# CONFIGURATION
			
 
				+if os.getenv('ADL_CONFIG_PATH') is None:
			
 
				+    with open('config.toml', 'rb') as f:
			
 
				+        config = toml.load(f)
			
 
				+else:
			
 
				+    with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
			
 
				+        config = toml.load(f)
			
 
				+
			
 
				+ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
			
 
				+
			
 
				+V2_PATH = ENSEMBLE_PATH + '/v2'
			
 
				+
			
 
				+
			
 
				+# Models is a dictionary with the model ids as keys and the model data as values
			
 
				+def get_model_predictions(models, data):
			
 
				+    predictions = {}
			
 
				+    for model_id, model in models.items():
			
 
				+        model.eval()
			
 
				+        with torch.no_grad:
			
 
				+            # Get the predictions
			
 
				+            output = model(data)
			
 
				+            predictions[model_id] = output.detach().cpu().numpy()
			
 
				+
			
 
				+    return predictions
			
 
				+
			
 
				+
			
 
				+def load_models_v2(folder, device):
			
 
				+    glob_path = os.path.join(folder, '*.pt')
			
 
				+    model_files = glob(glob_path)
			
 
				+    model_dict = {}
			
 
				+
			
 
				+    for model_file in model_files:
			
 
				+        model = torch.load(model_file, map_location=device)
			
 
				+        model_id = os.path.basename(model_file).split('_')[0]
			
 
				+        model_dict[model_id] = model
			
 
				+
			
 
				+    return model_dict
			
 
				+
			
 
				+
			
 
				+# Ensures that both mri and xls tensors in the data are unsqueezed and are on the correct device
			
 
				+def preprocess_data(data, device):
			
 
				+    mri, xls = data
			
 
				+    mri = mri.unsqueeze(0).to(device)
			
 
				+    xls = xls.unsqueeze(0).to(device)
			
 
				+    return (mri, xls)
			
 
				+
			
 
				+
			
 
				+def ensemble_dataset_predictions(models, dataset, device):
			
 
				+    # For each datapoint, get the predictions of each model
			
 
				+    predictions = {}
			
 
				+    for i, (data, target) in tqdm(enumerate(dataset), total=len(dataset)):
			
 
				+        # Preprocess data
			
 
				+        data = preprocess_data(data, device)
			
 
				+        # Predictions is a dicionary of tuples, with the target as the first and the model predicions dictionary as the second
			
 
				+        # The key is the id of the image
			
 
				+        predictions[i] = (
			
 
				+            target.detach().cpu().numpy(),
			
 
				+            get_model_predictions(models, data),
			
 
				+        )
			
 
				+
			
 
				+    return predictions
			
 
				+
			
 
				+
			
 
				+# Given a dictionary of predictions, select one model and eliminate the rest
			
 
				+def select_individual_model(predictions, model_id):
			
 
				+    selected_model_predictions = {}
			
 
				+    for key, value in predictions.items():
			
 
				+        selected_model_predictions[key] = (value[0], {model_id: value[1][model_id]})
			
 
				+    return selected_model_predictions
			
 
				+
			
 
				+
			
 
				+# Given a dictionary of predictions, select a subset of models and eliminate the rest
			
 
				+def select_subset_models(predictions, model_ids):
			
 
				+    selected_model_predictions = {}
			
 
				+    for key, value in predictions.items():
			
 
				+        selected_model_predictions[key] = (
			
 
				+            value[0],
			
 
				+            {model_id: value[1][model_id] for model_id in model_ids},
			
 
				+        )
			
 
				+    return selected_model_predictions
			
 
				+
			
 
				+
			
 
				+# Given a dictionary of predictions, calculate statistics (stdev, mean, entropy, accuracy, f1) for each result
			
 
				+def calculate_statistics(predictions):