|
@@ -27,14 +27,14 @@ def plot_coverage(
|
|
plt.plot(
|
|
plt.plot(
|
|
percentiles,
|
|
percentiles,
|
|
ensemble_results,
|
|
ensemble_results,
|
|
- 'ob',
|
|
|
|
- label='Ensemble',
|
|
|
|
|
|
+ "ob",
|
|
|
|
+ label="Ensemble",
|
|
)
|
|
)
|
|
plt.plot(
|
|
plt.plot(
|
|
percentiles,
|
|
percentiles,
|
|
individual_results,
|
|
individual_results,
|
|
- 'xr',
|
|
|
|
- label='Individual (on entire dataset)',
|
|
|
|
|
|
+ "xr",
|
|
|
|
+ label="Individual (on entire dataset)",
|
|
)
|
|
)
|
|
plt.xlabel(x_lablel)
|
|
plt.xlabel(x_lablel)
|
|
plt.ylabel(y_label)
|
|
plt.ylabel(y_label)
|
|
@@ -50,17 +50,17 @@ def plot_coverage(
|
|
RUN = False
|
|
RUN = False
|
|
|
|
|
|
# CONFIGURATION
|
|
# CONFIGURATION
|
|
-if os.getenv('ADL_CONFIG_PATH') is None:
|
|
|
|
- with open('config.toml', 'rb') as f:
|
|
|
|
|
|
+if os.getenv("ADL_CONFIG_PATH") is None:
|
|
|
|
+ with open("config.toml", "rb") as f:
|
|
config = toml.load(f)
|
|
config = toml.load(f)
|
|
else:
|
|
else:
|
|
- with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
|
|
|
|
|
|
+ with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
|
|
config = toml.load(f)
|
|
config = toml.load(f)
|
|
|
|
|
|
|
|
|
|
ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
|
|
ENSEMBLE_PATH = f"{config['paths']['model_output']}{config['ensemble']['name']}"
|
|
|
|
|
|
-V2_PATH = ENSEMBLE_PATH + '/v2'
|
|
|
|
|
|
+V2_PATH = ENSEMBLE_PATH + "/v2"
|
|
|
|
|
|
|
|
|
|
# Result is a 1x2 tensor, with the softmax of the 2 predicted classes
|
|
# Result is a 1x2 tensor, with the softmax of the 2 predicted classes
|
|
@@ -75,14 +75,14 @@ def output_to_confidence(result):
|
|
# This function conducts tests on the models and returns the results, as well as saving the predictions and metrics
|
|
# This function conducts tests on the models and returns the results, as well as saving the predictions and metrics
|
|
def get_predictions(config):
|
|
def get_predictions(config):
|
|
models, model_descs = ens.load_models(
|
|
models, model_descs = ens.load_models(
|
|
- f'{ENSEMBLE_PATH}/models/',
|
|
|
|
- config['training']['device'],
|
|
|
|
|
|
+ f"{ENSEMBLE_PATH}/models/",
|
|
|
|
+ config["training"]["device"],
|
|
)
|
|
)
|
|
- models = [model.to(config['training']['device']) for model in models]
|
|
|
|
- test_set = torch.load(f'{ENSEMBLE_PATH}/test_dataset.pt') + torch.load(
|
|
|
|
- f'{ENSEMBLE_PATH}/val_dataset.pt'
|
|
|
|
|
|
+ models = [model.to(config["training"]["device"]) for model in models]
|
|
|
|
+ test_set = torch.load(f"{ENSEMBLE_PATH}/test_dataset.pt") + torch.load(
|
|
|
|
+ f"{ENSEMBLE_PATH}/val_dataset.pt"
|
|
)
|
|
)
|
|
- print(f'Loaded {len(test_set)} samples')
|
|
|
|
|
|
+ print(f"Loaded {len(test_set)} samples")
|
|
|
|
|
|
# [([model results], labels)]
|
|
# [([model results], labels)]
|
|
results = []
|
|
results = []
|
|
@@ -93,12 +93,12 @@ def get_predictions(config):
|
|
for _, (data, target) in tqdm(
|
|
for _, (data, target) in tqdm(
|
|
enumerate(test_set),
|
|
enumerate(test_set),
|
|
total=len(test_set),
|
|
total=len(test_set),
|
|
- desc='Getting predictions',
|
|
|
|
- unit='sample',
|
|
|
|
|
|
+ desc="Getting predictions",
|
|
|
|
+ unit="sample",
|
|
):
|
|
):
|
|
mri, xls = data
|
|
mri, xls = data
|
|
- mri = mri.unsqueeze(0).to(config['training']['device'])
|
|
|
|
- xls = xls.unsqueeze(0).to(config['training']['device'])
|
|
|
|
|
|
+ mri = mri.unsqueeze(0).to(config["training"]["device"])
|
|
|
|
+ xls = xls.unsqueeze(0).to(config["training"]["device"])
|
|
data = (mri, xls)
|
|
data = (mri, xls)
|
|
res = []
|
|
res = []
|
|
for j, model in enumerate(models):
|
|
for j, model in enumerate(models):
|
|
@@ -164,125 +164,125 @@ if RUN:
|
|
# Convert to pandas dataframes
|
|
# Convert to pandas dataframes
|
|
confs_df = pd.DataFrame(
|
|
confs_df = pd.DataFrame(
|
|
confs,
|
|
confs,
|
|
- columns=['predicted_class', 'confidence', 'true_label', 'class_1', 'class_2'],
|
|
|
|
|
|
+ columns=["predicted_class", "confidence", "true_label", "class_1", "class_2"],
|
|
)
|
|
)
|
|
stdevs_df = pd.DataFrame(
|
|
stdevs_df = pd.DataFrame(
|
|
- stdevs, columns=['predicted_class', 'stdev', 'true_label', 'class_1', 'class_2']
|
|
|
|
|
|
+ stdevs, columns=["predicted_class", "stdev", "true_label", "class_1", "class_2"]
|
|
)
|
|
)
|
|
|
|
|
|
entropies_df = pd.DataFrame(
|
|
entropies_df = pd.DataFrame(
|
|
entropies,
|
|
entropies,
|
|
- columns=['predicted_class', 'entropy', 'true_label', 'class_1', 'class_2'],
|
|
|
|
|
|
+ columns=["predicted_class", "entropy", "true_label", "class_1", "class_2"],
|
|
)
|
|
)
|
|
|
|
|
|
- indv_df = pd.DataFrame(indv_results, columns=['class_1', 'class_2', 'true_label'])
|
|
|
|
|
|
+ indv_df = pd.DataFrame(indv_results, columns=["class_1", "class_2", "true_label"])
|
|
|
|
|
|
if not os.path.exists(V2_PATH):
|
|
if not os.path.exists(V2_PATH):
|
|
os.makedirs(V2_PATH)
|
|
os.makedirs(V2_PATH)
|
|
|
|
|
|
- confs_df.to_csv(f'{V2_PATH}/ensemble_confidences.csv')
|
|
|
|
- stdevs_df.to_csv(f'{V2_PATH}/ensemble_stdevs.csv')
|
|
|
|
- entropies_df.to_csv(f'{V2_PATH}/ensemble_entropies.csv')
|
|
|
|
- indv_df.to_csv(f'{V2_PATH}/individual_results.csv')
|
|
|
|
|
|
+ confs_df.to_csv(f"{V2_PATH}/ensemble_confidences.csv")
|
|
|
|
+ stdevs_df.to_csv(f"{V2_PATH}/ensemble_stdevs.csv")
|
|
|
|
+ entropies_df.to_csv(f"{V2_PATH}/ensemble_entropies.csv")
|
|
|
|
+ indv_df.to_csv(f"{V2_PATH}/individual_results.csv")
|
|
else:
|
|
else:
|
|
- confs_df = pd.read_csv(f'{V2_PATH}/ensemble_confidences.csv')
|
|
|
|
- stdevs_df = pd.read_csv(f'{V2_PATH}/ensemble_stdevs.csv')
|
|
|
|
- entropies_df = pd.read_csv(f'{V2_PATH}/ensemble_entropies.csv')
|
|
|
|
- indv_df = pd.read_csv(f'{V2_PATH}/individual_results.csv')
|
|
|
|
|
|
+ confs_df = pd.read_csv(f"{V2_PATH}/ensemble_confidences.csv")
|
|
|
|
+ stdevs_df = pd.read_csv(f"{V2_PATH}/ensemble_stdevs.csv")
|
|
|
|
+ entropies_df = pd.read_csv(f"{V2_PATH}/ensemble_entropies.csv")
|
|
|
|
+ indv_df = pd.read_csv(f"{V2_PATH}/individual_results.csv")
|
|
|
|
|
|
|
|
|
|
# Plot confidence vs standard deviation, and change color of dots based on if they are correct
|
|
# Plot confidence vs standard deviation, and change color of dots based on if they are correct
|
|
-correct_conf = confs_df[confs_df['predicted_class'] == confs_df['true_label']]
|
|
|
|
-incorrect_conf = confs_df[confs_df['predicted_class'] != confs_df['true_label']]
|
|
|
|
|
|
+correct_conf = confs_df[confs_df["predicted_class"] == confs_df["true_label"]]
|
|
|
|
+incorrect_conf = confs_df[confs_df["predicted_class"] != confs_df["true_label"]]
|
|
|
|
|
|
-correct_stdev = stdevs_df[stdevs_df['predicted_class'] == stdevs_df['true_label']]
|
|
|
|
-incorrect_stdev = stdevs_df[stdevs_df['predicted_class'] != stdevs_df['true_label']]
|
|
|
|
|
|
+correct_stdev = stdevs_df[stdevs_df["predicted_class"] == stdevs_df["true_label"]]
|
|
|
|
+incorrect_stdev = stdevs_df[stdevs_df["predicted_class"] != stdevs_df["true_label"]]
|
|
|
|
|
|
correct_ent = entropies_df[
|
|
correct_ent = entropies_df[
|
|
- entropies_df['predicted_class'] == entropies_df['true_label']
|
|
|
|
|
|
+ entropies_df["predicted_class"] == entropies_df["true_label"]
|
|
]
|
|
]
|
|
incorrect_ent = entropies_df[
|
|
incorrect_ent = entropies_df[
|
|
- entropies_df['predicted_class'] != entropies_df['true_label']
|
|
|
|
|
|
+ entropies_df["predicted_class"] != entropies_df["true_label"]
|
|
]
|
|
]
|
|
|
|
|
|
plot, ax = plt.subplots()
|
|
plot, ax = plt.subplots()
|
|
plt.scatter(
|
|
plt.scatter(
|
|
- correct_conf['confidence'],
|
|
|
|
- correct_stdev['stdev'],
|
|
|
|
- color='green',
|
|
|
|
- label='Correct Prediction',
|
|
|
|
|
|
+ correct_conf["confidence"],
|
|
|
|
+ correct_stdev["stdev"],
|
|
|
|
+ color="green",
|
|
|
|
+ label="Correct Prediction",
|
|
)
|
|
)
|
|
plt.scatter(
|
|
plt.scatter(
|
|
- incorrect_conf['confidence'],
|
|
|
|
- incorrect_stdev['stdev'],
|
|
|
|
- color='red',
|
|
|
|
- label='Incorrect Prediction',
|
|
|
|
|
|
+ incorrect_conf["confidence"],
|
|
|
|
+ incorrect_stdev["stdev"],
|
|
|
|
+ color="red",
|
|
|
|
+ label="Incorrect Prediction",
|
|
)
|
|
)
|
|
-plt.xlabel('Confidence (Raw Value)')
|
|
|
|
-plt.ylabel('Standard Deviation (Raw Value)')
|
|
|
|
-plt.title('Confidence vs Standard Deviation')
|
|
|
|
|
|
+plt.xlabel("Confidence (Raw Value)")
|
|
|
|
+plt.ylabel("Standard Deviation (Raw Value)")
|
|
|
|
+plt.title("Confidence vs Standard Deviation")
|
|
plt.legend()
|
|
plt.legend()
|
|
-plt.savefig(f'{V2_PATH}/confidence_vs_stdev.png')
|
|
|
|
|
|
+plt.savefig(f"{V2_PATH}/confidence_vs_stdev.png")
|
|
|
|
|
|
plt.close()
|
|
plt.close()
|
|
|
|
|
|
# Do the same for confidence vs entropy
|
|
# Do the same for confidence vs entropy
|
|
plot, ax = plt.subplots()
|
|
plot, ax = plt.subplots()
|
|
plt.scatter(
|
|
plt.scatter(
|
|
- correct_conf['confidence'],
|
|
|
|
- correct_ent['entropy'],
|
|
|
|
- color='green',
|
|
|
|
- label='Correct Prediction',
|
|
|
|
|
|
+ correct_conf["confidence"],
|
|
|
|
+ correct_ent["entropy"],
|
|
|
|
+ color="green",
|
|
|
|
+ label="Correct Prediction",
|
|
)
|
|
)
|
|
plt.scatter(
|
|
plt.scatter(
|
|
- incorrect_conf['confidence'],
|
|
|
|
- incorrect_ent['entropy'],
|
|
|
|
- color='red',
|
|
|
|
- label='Incorrect Prediction',
|
|
|
|
|
|
+ incorrect_conf["confidence"],
|
|
|
|
+ incorrect_ent["entropy"],
|
|
|
|
+ color="red",
|
|
|
|
+ label="Incorrect Prediction",
|
|
)
|
|
)
|
|
-plt.xlabel('Confidence (Raw Value)')
|
|
|
|
-plt.ylabel('Entropy (Raw Value)')
|
|
|
|
-plt.title('Confidence vs Entropy')
|
|
|
|
|
|
+plt.xlabel("Confidence (Raw Value)")
|
|
|
|
+plt.ylabel("Entropy (Raw Value)")
|
|
|
|
+plt.title("Confidence vs Entropy")
|
|
plt.legend()
|
|
plt.legend()
|
|
-plt.savefig(f'{V2_PATH}/confidence_vs_entropy.png')
|
|
|
|
|
|
+plt.savefig(f"{V2_PATH}/confidence_vs_entropy.png")
|
|
|
|
|
|
plt.close()
|
|
plt.close()
|
|
|
|
|
|
|
|
|
|
# Calculate individual model accuracy and entropy
|
|
# Calculate individual model accuracy and entropy
|
|
# Determine predicted class
|
|
# Determine predicted class
|
|
-indv_df['predicted_class'] = indv_df[['class_1', 'class_2']].idxmax(axis=1)
|
|
|
|
-indv_df['predicted_class'] = indv_df['predicted_class'].apply(
|
|
|
|
- lambda x: 0 if x == 'class_1' else 1
|
|
|
|
|
|
+indv_df["predicted_class"] = indv_df[["class_1", "class_2"]].idxmax(axis=1)
|
|
|
|
+indv_df["predicted_class"] = indv_df["predicted_class"].apply(
|
|
|
|
+ lambda x: 0 if x == "class_1" else 1
|
|
)
|
|
)
|
|
-indv_df['correct'] = indv_df['predicted_class'] == indv_df['true_label']
|
|
|
|
-accuracy_indv = indv_df['correct'].mean()
|
|
|
|
|
|
+indv_df["correct"] = indv_df["predicted_class"] == indv_df["true_label"]
|
|
|
|
+accuracy_indv = indv_df["correct"].mean()
|
|
f1_indv = met.F1(
|
|
f1_indv = met.F1(
|
|
- indv_df['predicted_class'].to_numpy(), indv_df['true_label'].to_numpy()
|
|
|
|
|
|
+ indv_df["predicted_class"].to_numpy(), indv_df["true_label"].to_numpy()
|
|
)
|
|
)
|
|
auc_indv = metrics.roc_auc_score(
|
|
auc_indv = metrics.roc_auc_score(
|
|
- indv_df['true_label'].to_numpy(), indv_df['class_2'].to_numpy()
|
|
|
|
|
|
+ indv_df["true_label"].to_numpy(), indv_df["class_2"].to_numpy()
|
|
)
|
|
)
|
|
-indv_df['entropy'] = -1 * indv_df[['class_1', 'class_2']].apply(
|
|
|
|
|
|
+indv_df["entropy"] = -1 * indv_df[["class_1", "class_2"]].apply(
|
|
lambda x: x * np.log(x), axis=0
|
|
lambda x: x * np.log(x), axis=0
|
|
).sum(axis=1)
|
|
).sum(axis=1)
|
|
|
|
|
|
# Calculate percentiles for confidence and standard deviation
|
|
# Calculate percentiles for confidence and standard deviation
|
|
-quantiles_conf = confs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
|
|
|
|
- 'confidence'
|
|
|
|
|
|
+quantiles_conf = confs_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
|
|
|
|
+ "confidence"
|
|
]
|
|
]
|
|
-quantiles_stdev = stdevs_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
|
|
|
|
- 'stdev'
|
|
|
|
|
|
+quantiles_stdev = stdevs_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
|
|
|
|
+ "stdev"
|
|
]
|
|
]
|
|
|
|
|
|
# Additionally for individual confidence
|
|
# Additionally for individual confidence
|
|
-quantiles_indv_conf = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
|
|
|
|
- 'class_2'
|
|
|
|
|
|
+quantiles_indv_conf = indv_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
|
|
|
|
+ "class_2"
|
|
]
|
|
]
|
|
|
|
|
|
# For indivual entropy
|
|
# For indivual entropy
|
|
-quantiles_indv_entropy = indv_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
|
|
|
|
- 'entropy'
|
|
|
|
|
|
+quantiles_indv_entropy = indv_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
|
|
|
|
+ "entropy"
|
|
]
|
|
]
|
|
|
|
|
|
#
|
|
#
|
|
@@ -293,13 +293,13 @@ iter_conf = it.islice(quantiles_conf.items(), 0, None)
|
|
for quantile in iter_conf:
|
|
for quantile in iter_conf:
|
|
percentile = quantile[0]
|
|
percentile = quantile[0]
|
|
|
|
|
|
- filt = confs_df[confs_df['confidence'] >= quantile[1]]
|
|
|
|
|
|
+ filt = confs_df[confs_df["confidence"] >= quantile[1]]
|
|
accuracy = (
|
|
accuracy = (
|
|
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
|
|
|
|
|
|
+ filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
|
|
)
|
|
)
|
|
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
|
|
|
|
|
|
+ f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
|
|
|
|
|
|
- accuracies_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
|
|
|
|
|
|
+ accuracies_conf.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
|
|
|
|
|
|
accuracies_df = pd.DataFrame(accuracies_conf)
|
|
accuracies_df = pd.DataFrame(accuracies_conf)
|
|
|
|
|
|
@@ -309,11 +309,11 @@ iter_conf = it.islice(quantiles_indv_conf.items(), 0, None)
|
|
for quantile in iter_conf:
|
|
for quantile in iter_conf:
|
|
percentile = quantile[0]
|
|
percentile = quantile[0]
|
|
|
|
|
|
- filt = indv_df[indv_df['class_2'] >= quantile[1]]
|
|
|
|
- accuracy = filt['correct'].mean()
|
|
|
|
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
|
|
|
|
|
|
+ filt = indv_df[indv_df["class_2"] >= quantile[1]]
|
|
|
|
+ accuracy = filt["correct"].mean()
|
|
|
|
+ f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
|
|
|
|
|
|
- indv_conf.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
|
|
|
|
|
|
+ indv_conf.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
|
|
|
|
|
|
indv_conf_df = pd.DataFrame(indv_conf)
|
|
indv_conf_df = pd.DataFrame(indv_conf)
|
|
|
|
|
|
@@ -323,35 +323,35 @@ iter_entropy = it.islice(quantiles_indv_entropy.items(), 0, None)
|
|
for quantile in iter_entropy:
|
|
for quantile in iter_entropy:
|
|
percentile = quantile[0]
|
|
percentile = quantile[0]
|
|
|
|
|
|
- filt = indv_df[indv_df['entropy'] <= quantile[1]]
|
|
|
|
- accuracy = filt['correct'].mean()
|
|
|
|
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
|
|
|
|
|
|
+ filt = indv_df[indv_df["entropy"] <= quantile[1]]
|
|
|
|
+ accuracy = filt["correct"].mean()
|
|
|
|
+ f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
|
|
|
|
|
|
- indv_entropy.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
|
|
|
|
|
|
+ indv_entropy.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
|
|
|
|
|
|
indv_entropy_df = pd.DataFrame(indv_entropy)
|
|
indv_entropy_df = pd.DataFrame(indv_entropy)
|
|
|
|
|
|
|
|
|
|
# Plot the coverage for confidence and accuracy
|
|
# Plot the coverage for confidence and accuracy
|
|
plot_coverage(
|
|
plot_coverage(
|
|
- accuracies_df['percentile'],
|
|
|
|
- accuracies_df['accuracy'],
|
|
|
|
- indv_conf_df['accuracy'],
|
|
|
|
- 'Confidence Accuracy Coverage Plot',
|
|
|
|
- 'Minimum Confidence Percentile (Low to High)',
|
|
|
|
- 'Accuracy',
|
|
|
|
- f'{V2_PATH}/coverage_conf.png',
|
|
|
|
|
|
+ accuracies_df["percentile"],
|
|
|
|
+ accuracies_df["accuracy"],
|
|
|
|
+ indv_conf_df["accuracy"],
|
|
|
|
+ "Confidence Accuracy Coverage Plot",
|
|
|
|
+ "Minimum Confidence Percentile (Low to High)",
|
|
|
|
+ "Accuracy",
|
|
|
|
+ f"{V2_PATH}/coverage_conf.png",
|
|
)
|
|
)
|
|
|
|
|
|
# Plot the coverage for confidence and F1
|
|
# Plot the coverage for confidence and F1
|
|
plot_coverage(
|
|
plot_coverage(
|
|
- accuracies_df['percentile'],
|
|
|
|
- accuracies_df['f1'],
|
|
|
|
- indv_conf_df['f1'],
|
|
|
|
- 'Confidence F1 Coverage Plot',
|
|
|
|
- 'Minimum Confidence Percentile (Low to High)',
|
|
|
|
- 'F1',
|
|
|
|
- f'{V2_PATH}/f1_coverage_conf.png',
|
|
|
|
|
|
+ accuracies_df["percentile"],
|
|
|
|
+ accuracies_df["f1"],
|
|
|
|
+ indv_conf_df["f1"],
|
|
|
|
+ "Confidence F1 Coverage Plot",
|
|
|
|
+ "Minimum Confidence Percentile (Low to High)",
|
|
|
|
+ "F1",
|
|
|
|
+ f"{V2_PATH}/f1_coverage_conf.png",
|
|
)
|
|
)
|
|
|
|
|
|
# Repeat for standard deviation
|
|
# Repeat for standard deviation
|
|
@@ -360,90 +360,90 @@ iter_stdev = it.islice(quantiles_stdev.items(), 0, None)
|
|
for quantile in iter_stdev:
|
|
for quantile in iter_stdev:
|
|
percentile = quantile[0]
|
|
percentile = quantile[0]
|
|
|
|
|
|
- filt = stdevs_df[stdevs_df['stdev'] <= quantile[1]]
|
|
|
|
|
|
+ filt = stdevs_df[stdevs_df["stdev"] <= quantile[1]]
|
|
accuracy = (
|
|
accuracy = (
|
|
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
|
|
|
|
|
|
+ filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
|
|
)
|
|
)
|
|
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
|
|
|
|
|
|
+ f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
|
|
|
|
|
|
- accuracies_stdev.append({'percentile': percentile, 'accuracy': accuracy, 'f1': f1})
|
|
|
|
|
|
+ accuracies_stdev.append({"percentile": percentile, "accuracy": accuracy, "f1": f1})
|
|
|
|
|
|
accuracies_stdev_df = pd.DataFrame(accuracies_stdev)
|
|
accuracies_stdev_df = pd.DataFrame(accuracies_stdev)
|
|
|
|
|
|
fig, ax = plt.subplots()
|
|
fig, ax = plt.subplots()
|
|
plt.plot(
|
|
plt.plot(
|
|
- accuracies_stdev_df['percentile'],
|
|
|
|
- accuracies_stdev_df['accuracy'],
|
|
|
|
- 'ob',
|
|
|
|
- label='Ensemble',
|
|
|
|
|
|
+ accuracies_stdev_df["percentile"],
|
|
|
|
+ accuracies_stdev_df["accuracy"],
|
|
|
|
+ "ob",
|
|
|
|
+ label="Ensemble",
|
|
)
|
|
)
|
|
plt.plot(
|
|
plt.plot(
|
|
- accuracies_stdev_df['percentile'],
|
|
|
|
- [accuracy_indv] * len(accuracies_stdev_df['percentile']),
|
|
|
|
- 'xr',
|
|
|
|
- label='Individual (on entire dataset)',
|
|
|
|
|
|
+ accuracies_stdev_df["percentile"],
|
|
|
|
+ [accuracy_indv] * len(accuracies_stdev_df["percentile"]),
|
|
|
|
+ "xr",
|
|
|
|
+ label="Individual (on entire dataset)",
|
|
)
|
|
)
|
|
-plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
|
|
|
|
-plt.ylabel('Accuracy')
|
|
|
|
-plt.title('Standard Deviation Accuracy Coverage Plot')
|
|
|
|
|
|
+plt.xlabel("Maximum Standard Deviation Percentile (High to Low)")
|
|
|
|
+plt.ylabel("Accuracy")
|
|
|
|
+plt.title("Standard Deviation Accuracy Coverage Plot")
|
|
plt.legend()
|
|
plt.legend()
|
|
plt.gca().invert_xaxis()
|
|
plt.gca().invert_xaxis()
|
|
ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
|
|
ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
|
|
-plt.savefig(f'{V2_PATH}/coverage_stdev.png')
|
|
|
|
|
|
+plt.savefig(f"{V2_PATH}/coverage_stdev.png")
|
|
plt.close()
|
|
plt.close()
|
|
|
|
|
|
# Plot coverage vs F1 for standard deviation
|
|
# Plot coverage vs F1 for standard deviation
|
|
fig, ax = plt.subplots()
|
|
fig, ax = plt.subplots()
|
|
plt.plot(
|
|
plt.plot(
|
|
- accuracies_stdev_df['percentile'], accuracies_stdev_df['f1'], 'ob', label='Ensemble'
|
|
|
|
|
|
+ accuracies_stdev_df["percentile"], accuracies_stdev_df["f1"], "ob", label="Ensemble"
|
|
)
|
|
)
|
|
plt.plot(
|
|
plt.plot(
|
|
- accuracies_stdev_df['percentile'],
|
|
|
|
- [f1_indv] * len(accuracies_stdev_df['percentile']),
|
|
|
|
- 'xr',
|
|
|
|
- label='Individual (on entire dataset)',
|
|
|
|
|
|
+ accuracies_stdev_df["percentile"],
|
|
|
|
+ [f1_indv] * len(accuracies_stdev_df["percentile"]),
|
|
|
|
+ "xr",
|
|
|
|
+ label="Individual (on entire dataset)",
|
|
)
|
|
)
|
|
-plt.xlabel('Maximum Standard Deviation Percentile (High to Low)')
|
|
|
|
-plt.ylabel('F1')
|
|
|
|
-plt.title('Standard Deviation F1 Coverage Plot')
|
|
|
|
|
|
+plt.xlabel("Maximum Standard Deviation Percentile (High to Low)")
|
|
|
|
+plt.ylabel("F1")
|
|
|
|
+plt.title("Standard Deviation F1 Coverage Plot")
|
|
plt.legend()
|
|
plt.legend()
|
|
plt.gca().invert_xaxis()
|
|
plt.gca().invert_xaxis()
|
|
ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
|
|
ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
|
|
-plt.savefig(f'{V2_PATH}/coverage_f1_stdev.png')
|
|
|
|
|
|
+plt.savefig(f"{V2_PATH}/coverage_f1_stdev.png")
|
|
|
|
|
|
plt.close()
|
|
plt.close()
|
|
|
|
|
|
|
|
|
|
# Print overall accuracy
|
|
# Print overall accuracy
|
|
overall_accuracy = (
|
|
overall_accuracy = (
|
|
- confs_df[confs_df['predicted_class'] == confs_df['true_label']].shape[0]
|
|
|
|
|
|
+ confs_df[confs_df["predicted_class"] == confs_df["true_label"]].shape[0]
|
|
/ confs_df.shape[0]
|
|
/ confs_df.shape[0]
|
|
)
|
|
)
|
|
overall_f1 = met.F1(
|
|
overall_f1 = met.F1(
|
|
- confs_df['predicted_class'].to_numpy(), confs_df['true_label'].to_numpy()
|
|
|
|
|
|
+ confs_df["predicted_class"].to_numpy(), confs_df["true_label"].to_numpy()
|
|
)
|
|
)
|
|
# Calculate ECE and MCE
|
|
# Calculate ECE and MCE
|
|
conf_ece = met.ECE(
|
|
conf_ece = met.ECE(
|
|
- confs_df['predicted_class'].to_numpy(),
|
|
|
|
- confs_df['confidence'].to_numpy(),
|
|
|
|
- confs_df['true_label'].to_numpy(),
|
|
|
|
|
|
+ confs_df["predicted_class"].to_numpy(),
|
|
|
|
+ confs_df["confidence"].to_numpy(),
|
|
|
|
+ confs_df["true_label"].to_numpy(),
|
|
)
|
|
)
|
|
|
|
|
|
stdev_ece = met.ECE(
|
|
stdev_ece = met.ECE(
|
|
- stdevs_df['predicted_class'].to_numpy(),
|
|
|
|
- stdevs_df['stdev'].to_numpy(),
|
|
|
|
- stdevs_df['true_label'].to_numpy(),
|
|
|
|
|
|
+ stdevs_df["predicted_class"].to_numpy(),
|
|
|
|
+ stdevs_df["stdev"].to_numpy(),
|
|
|
|
+ stdevs_df["true_label"].to_numpy(),
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
-print(f'Overall accuracy: {overall_accuracy}, Overall F1: {overall_f1},')
|
|
|
|
-print(f'Confidence ECE: {conf_ece}')
|
|
|
|
-print(f'Standard Deviation ECE: {stdev_ece}')
|
|
|
|
|
|
+print(f"Overall accuracy: {overall_accuracy}, Overall F1: {overall_f1},")
|
|
|
|
+print(f"Confidence ECE: {conf_ece}")
|
|
|
|
+print(f"Standard Deviation ECE: {stdev_ece}")
|
|
|
|
|
|
|
|
|
|
# Repeat for entropy
|
|
# Repeat for entropy
|
|
-quantiles_entropy = entropies_df.quantile(np.linspace(0, 1, 11), interpolation='lower')[
|
|
|
|
- 'entropy'
|
|
|
|
|
|
+quantiles_entropy = entropies_df.quantile(np.linspace(0, 1, 11), interpolation="lower")[
|
|
|
|
+ "entropy"
|
|
]
|
|
]
|
|
|
|
|
|
accuracies_entropy = []
|
|
accuracies_entropy = []
|
|
@@ -451,14 +451,14 @@ iter_entropy = it.islice(quantiles_entropy.items(), 0, None)
|
|
for quantile in iter_entropy:
|
|
for quantile in iter_entropy:
|
|
percentile = quantile[0]
|
|
percentile = quantile[0]
|
|
|
|
|
|
- filt = entropies_df[entropies_df['entropy'] <= quantile[1]]
|
|
|
|
|
|
+ filt = entropies_df[entropies_df["entropy"] <= quantile[1]]
|
|
accuracy = (
|
|
accuracy = (
|
|
- filt[filt['predicted_class'] == filt['true_label']].shape[0] / filt.shape[0]
|
|
|
|
|
|
+ filt[filt["predicted_class"] == filt["true_label"]].shape[0] / filt.shape[0]
|
|
)
|
|
)
|
|
- f1 = met.F1(filt['predicted_class'].to_numpy(), filt['true_label'].to_numpy())
|
|
|
|
|
|
+ f1 = met.F1(filt["predicted_class"].to_numpy(), filt["true_label"].to_numpy())
|
|
|
|
|
|
accuracies_entropy.append(
|
|
accuracies_entropy.append(
|
|
- {'percentile': percentile, 'accuracy': accuracy, 'f1': f1}
|
|
|
|
|
|
+ {"percentile": percentile, "accuracy": accuracy, "f1": f1}
|
|
)
|
|
)
|
|
|
|
|
|
accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
|
|
accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
|
|
@@ -466,23 +466,23 @@ accuracies_entropy_df = pd.DataFrame(accuracies_entropy)
|
|
|
|
|
|
# Plot the coverage for entropy and accuracy
|
|
# Plot the coverage for entropy and accuracy
|
|
plot_coverage(
|
|
plot_coverage(
|
|
- accuracies_entropy_df['percentile'],
|
|
|
|
- accuracies_entropy_df['accuracy'],
|
|
|
|
- indv_entropy_df['accuracy'],
|
|
|
|
- 'Entropy Accuracy Coverage Plot',
|
|
|
|
- 'Minimum Entropy Percentile (Low to High)',
|
|
|
|
- 'Accuracy',
|
|
|
|
- f'{V2_PATH}/coverage_entropy.png',
|
|
|
|
|
|
+ accuracies_entropy_df["percentile"],
|
|
|
|
+ accuracies_entropy_df["accuracy"],
|
|
|
|
+ indv_entropy_df["accuracy"],
|
|
|
|
+ "Entropy Accuracy Coverage Plot",
|
|
|
|
+ "Minimum Entropy Percentile (Low to High)",
|
|
|
|
+ "Accuracy",
|
|
|
|
+ f"{V2_PATH}/coverage_entropy.png",
|
|
)
|
|
)
|
|
|
|
|
|
# Plot the coverage for entropy and F1
|
|
# Plot the coverage for entropy and F1
|
|
plot_coverage(
|
|
plot_coverage(
|
|
- accuracies_entropy_df['percentile'],
|
|
|
|
- accuracies_entropy_df['f1'],
|
|
|
|
- indv_entropy_df['f1'],
|
|
|
|
- 'Entropy F1 Coverage Plot',
|
|
|
|
- 'Maximum Entropy Percentile (High to Low)',
|
|
|
|
- 'F1',
|
|
|
|
- f'{V2_PATH}/f1_coverage_entropy.png',
|
|
|
|
|
|
+ accuracies_entropy_df["percentile"],
|
|
|
|
+ accuracies_entropy_df["f1"],
|
|
|
|
+ indv_entropy_df["f1"],
|
|
|
|
+ "Entropy F1 Coverage Plot",
|
|
|
|
+ "Maximum Entropy Percentile (High to Low)",
|
|
|
|
+ "F1",
|
|
|
|
+ f"{V2_PATH}/f1_coverage_entropy.png",
|
|
flip=True,
|
|
flip=True,
|
|
)
|
|
)
|