123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488 |
- import pandas as pd
- import numpy as np
- import os
- import tomli as toml
- import utils.ensemble as ens
- import torch
- import matplotlib.pyplot as plt
- import sklearn.metrics as metrics
- from tqdm import tqdm
- import utils.metrics as met
- import itertools as it
- import matplotlib.ticker as ticker
- # Define plotting helper function
- def plot_coverage(
- percentiles,
- ensemble_results,
- individual_results,
- title,
- x_lablel,
- y_label,
- save_path,
- flip=False,
- ):
- fig, ax = plt.subplots()
- plt.plot(
- percentiles,
- ensemble_results,
- 'ob',
- label='Ensemble',
- )
- plt.plot(
- percentiles,
- individual_results,
- 'xr',
- label='Individual (on entire dataset)',
- )
- plt.xlabel(x_lablel)
- plt.ylabel(y_label)
- plt.title(title)
- plt.legend()
- if flip:
- plt.gca().invert_xaxis()
- ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
- plt.savefig(save_path)
- plt.close()
- RUN = False
- # CONFIGURATION
- if os.getenv('ADL_CONFIG_PATH') is None:
- with open('config.toml', 'rb') as f:
- config = toml.load(f)
- else:
- with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
- config = toml.load(f)
- ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
- V2_PATH = ENSEMBLE_PATH + '/v2'
- # Result is a 1x2 tensor, with the softmax of the 2 predicted classes
- # Want to convert to a predicted class and a confidence
- def output_to_confidence(result):
- predicted_class = torch.argmax(result).item()
- confidence = (torch.max(result).item() - 0.5) * 2
- return torch.Tensor([predicted_class, confidence])
- # This function conducts tests on the models and returns the results, as well as saving the predictions and metrics
- def get_predictions(config):
- models, model_descs = ens.load_models(
- f'{ENSEMBLE_PATH}/models/',
- config['training']['device'],
- )
- models = [model.to(config['training']['device']) for model in models]
- test_set = torch.load(f'{ENSEMBLE_PATH}/test_dataset.pt') + torch.load(
- f'{ENSEMBLE_PATH}/val_dataset.pt'
- )
- print(f'Loaded {len(test_set)} samples')
- # [([model results], labels)]
- results = []
- # [(class_1, class_2, true_label)]
- indv_results = []
- for _, (data, target) in tqdm(
- enumerate(test_set),
- total=len(test_set),
- desc='Getting predictions',
- unit='sample',
- ):
- mri, xls = data
- mri = mri.unsqueeze(0).to(config['training']['device'])
- xls = xls.unsqueeze(0).to(config['training']['device'])
- data = (mri, xls)
- res = []
- for j, model in enumerate(models):
- model.eval()
- with torch.no_grad():
- output = model(data)
- output = output.tolist()
- if j == 0:
- indv_results.append((output[0][0], output[0][1], target[1].item()))
- res.append(output)
- results.append((res, target.tolist()))
- # The results are a list of tuples, where each tuple contains a list of model outputs and the true label
- # We want to convert this to 2 list of tuples, one with the ensemble predicted class, ensemble confidence and true label
- # And one with the ensemble predicted class, ensemble standard deviation and true label
- # [(ensemble predicted class, ensemble confidence, true label)]
- confidences = []
- # [(ensemble predicted class, ensemble standard deviation, true label)]
- stdevs = []
- # [(ensemble predicted class, ensemble entropy, true label)]
- entropies = []
- for result in results:
- model_results, true_label = result
- # Get the ensemble mean and variance with numpy, as these are lists
- mean = np.mean(model_results, axis=0)
- variance = np.var(model_results, axis=0)
- # Calculate the entropy
- entropy = -1 * np.sum(mean * np.log(mean))
- # Calculate confidence and standard deviation
- confidence = (np.max(mean) - 0.5) * 2
- stdev = np.sqrt(variance)
- # Get the predicted class
- predicted_class = np.argmax(mean)
- # Get the confidence and standard deviation of the predicted class
- pc_stdev = np.squeeze(stdev)[predicted_class]
- # Get the individual classes
- class_1 = mean[0][0]
- class_2 = mean[0][1]
- # Get the true label
- true_label = true_label[1]
- confidences.append((predicted_class, confidence, true_label, class_1, class_2))
- stdevs.append((predicted_class, pc_stdev, true_label, class_1, class_2))
- entropies.append((predicted_class, entropy, true_label, class_1, class_2))
- return results, confidences, stdevs, entropies, indv_results
- if RUN:
- results, confs, stdevs, entropies, indv_results = get_predictions(config)
- # Convert to pandas dataframes
- confs_df = pd.DataFrame(
- confs,
- columns=['predicted_class', 'confidence', 'true_label', 'class_1', 'class_2'],
- )
- stdevs_df = pd.DataFrame(
- stdevs, columns=['predicted_class', 'stdev', 'true_label', 'class_1', 'class_2']
- )
- entropies_df = pd.DataFrame(
- entropies,
- columns=['predicted_class', 'entropy', 'true_label', 'class_1', 'class_2'],
- )
- indv_df = pd.DataFrame(indv_results, columns=['class_1', 'class_2', 'true_label'])
- if not os.path.exists(V2_PATH):
- os.makedirs(V2_PATH)
- confs_df.to_csv(f'{V2_PATH}/ensemble_confidences.csv')
- stdevs_df.to_csv(f'{V2_PATH}/ensemble_stdevs.csv')
- entropies_df.to_csv(f'{V2_PATH}/ensemble_entropies.csv')
- indv_df.to_csv(f'{V2_PATH}/individual_results.csv')
- else:
- confs_df = pd.read_csv(f'{V2_PATH}/ensemble_confidences.csv')
- stdevs_df = pd.read_csv(f'{V2_PATH}/ensemble_stdevs.csv')
- entropies_df = pd.read_csv(f'{V2_PATH}/ensemble_entropies.csv')
- indv_df = pd.read_csv(f'{V2_PATH}/individual_results.csv')
- # Plot confidence vs standard deviation, and change color of dots based on if they are correct
- correct_conf = confs_df[confs_df['predicted_class'] == confs_df['true_label']]
- incorrect_conf = confs_df[confs_df['predicted_class'] != confs_df['true_label']]
- correct_stdev = stdevs_df[stdevs_df['predicted_class'] == stdevs_df['true_label']]
- incorrect_stdev = stdevs_df[stdevs_df['predicted_class'] != stdevs_df['true_label']]
- correct_ent = entropies_df[
- entropies_df['predicted_class'] == entropies_df['true_label']
- ]
- incorrect_ent = entropies_df[
- entropies_df['predicted_class'] != entropies_df['true_label']
- ]
- plot, ax = plt.subplots()
- plt.scatter(
- correct_conf['confidence'],
- correct_stdev['stdev'],
- color='green',
- label='Correct Prediction',
- )
- plt.scatter(
- incorrect_conf['confidence'],
- incorrect_stdev['stdev'],
- color='red',
- label='Incorrect Prediction',
- )
- plt.xlabel('Confidence (Raw Value)')
- plt.ylabel('Standard Deviation (Raw Value)')
- plt.title('Confidence vs Standard Deviation')
- plt.legend()
- plt.savefig(f'{V2_PATH}/confidence_vs_stdev.png')
- plt.close()
- # Do the same for confidence vs entropy
- plot, ax = plt.subplots()
- plt.scatter(
- correct_conf['confidence'],
- correct_ent['entropy'],
- color='green',
- label='Correct Prediction',
- )
- plt.scatter(
- incorrect_conf['confidence'],
- incorrect_ent['entropy'],
- color='red',
- label='Incorrect Prediction',
- )
- plt.xlabel('Confidence (Raw Value)')
- plt.ylabel('Entropy (Raw Value)')
- plt.title('Confidence vs Entropy')
- plt.legend()
- plt.savefig(f'{V2_PATH}/confidence_vs_entropy.png')
- plt.close()
- # Calculate individual model accuracy and entropy
- # Determine predicted class
- indv_df['predicted_class'] = indv_df[['class_1', 'class_2']].idxmax(axis=1)
- indv_df['predicted_class'] = indv_df['predicted_class'].apply(
- lambda x: 0 if x == 'class_1' else 1
- )
- indv_df['correct'] = indv_df['predicted_class'] == indv_df['true_label']
- accuracy_indv = indv_df['correct'].mean()
- f1_indv = met.F1(
- indv_df['predicted_class'].to_numpy(), indv_df['true_label'].to_numpy()
- )
- auc_indv = metrics.roc_auc_score(
- indv_df['true_label'].to_numpy(), indv_df['class_2'].to_numpy()
- )
- indv_df['entropy'] = -1 * indv_df[['class_1', 'class_2']].apply(
- lambda x: x * np.log(x), axis=0
- ).sum(axis=1)
- # Calculate percentiles for confidence and standard deviation
- quantiles_conf = confs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
- 'confidence'
- ]
- quantiles_stdev = stdevs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
- 'stdev'
- ]
- # Additionally for individual confidence
- quantiles_indv_conf = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
- 'class_2'
- ]
- # For indivual entropy
- quantiles_indv_entropy = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
- 'entropy'
- ]
- #
- accuracies_conf = []
- # Use the quantiles to calculate the coverage
- iter_conf = it.islice(quantiles_conf.items(), 0, None)
- for quantile in iter_conf:
- percentile = quantile[0]
- filt = confs_df[confs_df['confidence'] >= quantile[1]]
- accuracy = (
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
- )
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
- accuracies_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
- accuracies_df = pd.DataFrame(accuracies_conf)
- indv_conf = []
- # Use the quantiles to calculate the coverage
- iter_conf = it.islice(quantiles_indv_conf.items(), 0, None)
- for quantile in iter_conf:
- percentile = quantile[0]
- filt = indv_df[indv_df['class_2'] >= quantile[1]]
- accuracy = filt['correct'].mean()
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
- indv_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
- indv_conf_df = pd.DataFrame(indv_conf)
- # Do the same for entropy
- indv_entropy = []
- iter_entropy = it.islice(quantiles_indv_entropy.items(), 0, None)
- for quantile in iter_entropy:
- percentile = quantile[0]
- filt = indv_df[indv_df['entropy'] <= quantile[1]]
- accuracy = filt['correct'].mean()
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
- indv_entropy.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
- indv_entropy_df = pd.DataFrame(indv_entropy)
- # Plot the coverage for confidence and accuracy
- plot_coverage(
- accuracies_df['percentile'],
- accuracies_df['accuracy'],
- indv_conf_df['accuracy'],
- 'Confidence Accuracy Coverage Plot',
- 'Minimum Confidence Percentile (Low to High)',
- 'Accuracy',
- f'{V2_PATH}/coverage_conf.png',
- )
- # Plot the coverage for confidence and F1
- plot_coverage(
- accuracies_df['percentile'],
- accuracies_df['f1'],
- indv_conf_df['f1'],
- 'Confidence F1 Coverage Plot',
- 'Minimum Confidence Percentile (Low to High)',
- 'F1',
- f'{V2_PATH}/f1_coverage_conf.png',
- )
- # Repeat for standard deviation
- accuracies_stdev = []
- iter_stdev = it.islice(quantiles_stdev.items(), 0, None)
- for quantile in iter_stdev:
- percentile = quantile[0]
- filt = stdevs_df[stdevs_df['stdev'] <= quantile[1]]
- accuracy = (
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
- )
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
- accuracies_stdev.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
- accuracies_stdev_df = pd.DataFrame(accuracies_stdev)
- fig, ax = plt.subplots()
- plt.plot(
- accuracies_stdev_df['percentile'],
- accuracies_stdev_df['accuracy'],
- 'ob',
- label='Ensemble',
- )
- plt.plot(
- accuracies_stdev_df['percentile'],
- [accuracy_indv] * len(accuracies_stdev_df['percentile']),
- 'xr',
- label='Individual (on entire dataset)',
- )
- plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
- plt.ylabel('Accuracy')
- plt.title('Standard Deviation Accuracy Coverage Plot')
- plt.legend()
- plt.gca().invert_xaxis()
- ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
- plt.savefig(f'{V2_PATH}/coverage_stdev.png')
- plt.close()
- # Plot coverage vs F1 for standard deviation
- fig, ax = plt.subplots()
- plt.plot(
- accuracies_stdev_df['percentile'], accuracies_stdev_df['f1'], 'ob', label='Ensemble'
- )
- plt.plot(
- accuracies_stdev_df['percentile'],
- [f1_indv] * len(accuracies_stdev_df['percentile']),
- 'xr',
- label='Individual (on entire dataset)',
- )
- plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
- plt.ylabel('F1')
- plt.title('Standard Deviation F1 Coverage Plot')
- plt.legend()
- plt.gca().invert_xaxis()
- ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
- plt.savefig(f'{V2_PATH}/coverage_f1_stdev.png')
- plt.close()
- # Print overall accuracy
- overall_accuracy = (
- confs_df[confs_df['predicted_class'] == confs_df['true_label']].shape[0]
- / confs_df.shape[0]
- )
- overall_f1 = met.F1(
- confs_df['predicted_class'].to_numpy(), confs_df['true_label'].to_numpy()
- )
- # Calculate ECE and MCE
- conf_ece = met.ECE(
- confs_df['predicted_class'].to_numpy(),
- confs_df['confidence'].to_numpy(),
- confs_df['true_label'].to_numpy(),
- )
- stdev_ece = met.ECE(
- stdevs_df['predicted_class'].to_numpy(),
- stdevs_df['stdev'].to_numpy(),
- stdevs_df['true_label'].to_numpy(),
- )
- print(f'Overall accuracy: {overall_accuracy}, Overall F1: {overall_f1},')
- print(f'Confidence ECE: {conf_ece}')
- print(f'Standard Deviation ECE: {stdev_ece}')
- # Repeat for entropy
- quantiles_entropy = entropies_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
- 'entropy'
- ]
- accuracies_entropy = []
- iter_entropy = it.islice(quantiles_entropy.items(), 0, None)
- for quantile in iter_entropy:
- percentile = quantile[0]
- filt = entropies_df[entropies_df['entropy'] <= quantile[1]]
- accuracy = (
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
- )
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
- accuracies_entropy.append(
- {'percentile': percentile, 'accuracy': accuracy, 'f1': f1}
- )
- accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
- # Plot the coverage for entropy and accuracy
- plot_coverage(
- accuracies_entropy_df['percentile'],
- accuracies_entropy_df['accuracy'],
- indv_entropy_df['accuracy'],
- 'Entropy Accuracy Coverage Plot',
- 'Minimum Entropy Percentile (Low to High)',
- 'Accuracy',
- f'{V2_PATH}/coverage_entropy.png',
- )
- # Plot the coverage for entropy and F1
- plot_coverage(
- accuracies_entropy_df['percentile'],
- accuracies_entropy_df['f1'],
- indv_entropy_df['f1'],
- 'Entropy F1 Coverage Plot',
- 'Maximum Entropy Percentile (High to Low)',
- 'F1',
- f'{V2_PATH}/f1_coverage_entropy.png',
- flip=True,
- )
|