123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- import pandas as pd
- import numpy as np
- import os
- import tomli as toml
- from utils.data.datasets import prepare_datasets
- import utils.ensemble as ens
- import torch
- import matplotlib.pyplot as plt
- import sklearn.metrics as metrics
- from tqdm import tqdm
- import utils.metrics as met
- import itertools as it
- import matplotlib.ticker as ticker
- import glob
- # CONFIGURATION
- if os.getenv('ADL_CONFIG_PATH') is None:
- with open('config.toml', 'rb') as f:
- config = toml.load(f)
- else:
- with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
- config = toml.load(f)
- ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
- V2_PATH = ENSEMBLE_PATH + '/v2'
- # Models is a dictionary with the model ids as keys and the model data as values
- def get_model_predictions(models, data):
- predictions = {}
- for model_id, model in models.items():
- model.eval()
- with torch.no_grad:
- # Get the predictions
- output = model(data)
- predictions[model_id] = output.detach().cpu().numpy()
- return predictions
- def load_models_v2(folder, device):
- glob_path = os.path.join(folder, '*.pt')
- model_files = glob(glob_path)
- model_dict = {}
- for model_file in model_files:
- model = torch.load(model_file, map_location=device)
- model_id = os.path.basename(model_file).split('_')[0]
- model_dict[model_id] = model
- return model_dict
- # Ensures that both mri and xls tensors in the data are unsqueezed and are on the correct device
- def preprocess_data(data, device):
- mri, xls = data
- mri = mri.unsqueeze(0).to(device)
- xls = xls.unsqueeze(0).to(device)
- return (mri, xls)
- def ensemble_dataset_predictions(models, dataset, device):
- # For each datapoint, get the predictions of each model
- predictions = {}
- for i, (data, target) in tqdm(enumerate(dataset), total=len(dataset)):
- # Preprocess data
- data = preprocess_data(data, device)
- # Predictions is a dicionary of tuples, with the target as the first and the model predicions dictionary as the second
- # The key is the id of the image
- predictions[i] = (
- target.detach().cpu().numpy(),
- get_model_predictions(models, data),
- )
- return predictions
- # Given a dictionary of predictions, select one model and eliminate the rest
- def select_individual_model(predictions, model_id):
- selected_model_predictions = {}
- for key, value in predictions.items():
- selected_model_predictions[key] = (value[0], {model_id: value[1][model_id]})
- return selected_model_predictions
- # Given a dictionary of predictions, select a subset of models and eliminate the rest
- def select_subset_models(predictions, model_ids):
- selected_model_predictions = {}
- for key, value in predictions.items():
- selected_model_predictions[key] = (
- value[0],
- {model_id: value[1][model_id] for model_id in model_ids},
- )
- return selected_model_predictions
- # Given a dictionary of predictions, calculate statistics (stdev, mean, entropy, accuracy, f1) for each result
- def calculate_statistics(predictions):
|