|
@@ -0,0 +1,134 @@
|
|
|
|
|
+import xarray as xr
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+import sys
|
|
|
|
|
+import os
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+sys.path.append(
|
|
|
|
|
+ os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
|
+) # to allow imports from parent directory
|
|
|
|
|
+from utils.config import config
|
|
|
|
|
+import pathlib as pl
|
|
|
|
|
+
|
|
|
|
|
+import colorama as clr
|
|
|
|
|
+
|
|
|
|
|
+model_dataset_path = pl.Path(config["output"]["path"]) / "model_evaluation_results.nc"
|
|
|
|
|
+array = xr.open_dataset(model_dataset_path) # type: ignore
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+predictions: xr.DataArray = array["predictions"]
|
|
|
|
|
+labels: xr.DataArray = array["labels"]
|
|
|
|
|
+
|
|
|
|
|
+# Average predictions across models
|
|
|
|
|
+avg_predictions = predictions.mean(dim="model")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# Sort from highest to lowest confidence for the positive class (img_class=1)
|
|
|
|
|
+sorted_indices = np.argsort(-avg_predictions.sel(img_class=1).values)
|
|
|
|
|
+sorted_avg_predictions = avg_predictions.isel(img_id=sorted_indices)
|
|
|
|
|
+sorted_labels = labels.isel(img_id=sorted_indices)
|
|
|
|
|
+
|
|
|
|
|
+# Print out all predictions with their labels
|
|
|
|
|
+top_n = sorted_avg_predictions.sizes[
|
|
|
|
|
+ "img_id"
|
|
|
|
|
+] # Change this value to print more or fewer
|
|
|
|
|
+print(
|
|
|
|
|
+ clr.Fore.CYAN
|
|
|
|
|
+ + f"Top {top_n} Predictions (Confidence for Positive Class):"
|
|
|
|
|
+ + clr.Style.RESET_ALL
|
|
|
|
|
+)
|
|
|
|
|
+for i in range(top_n):
|
|
|
|
|
+ confidence = sorted_avg_predictions.sel(img_class=1).isel(img_id=i).item()
|
|
|
|
|
+ label = sorted_labels.isel(img_id=i, label=1).values
|
|
|
|
|
+
|
|
|
|
|
+ correctness = (
|
|
|
|
|
+ "CORRECT"
|
|
|
|
|
+ if (confidence >= 0.5 and label == 1) or (confidence < 0.5 and label == 0)
|
|
|
|
|
+ else "INCORRECT"
|
|
|
|
|
+ )
|
|
|
|
|
+ color = clr.Fore.GREEN if correctness == "CORRECT" else clr.Fore.RED
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"Image ID: {sorted_avg_predictions.img_id.isel(img_id=i).item():<8}, "
|
|
|
|
|
+ f"Confidence: {confidence:.4f}, "
|
|
|
|
|
+ f"Label: {label:<3}, " + color + f"{correctness:<9}" + clr.Style.RESET_ALL
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# Calculate overall accuracy
|
|
|
|
|
+predicted_positive = avg_predictions.sel(img_class=1) >= 0.5
|
|
|
|
|
+true_positive = labels.sel(label=1) == 1
|
|
|
|
|
+correct_predictions = (predicted_positive == true_positive).sum().item()
|
|
|
|
|
+total_predictions = len(avg_predictions.img_id)
|
|
|
|
|
+overall_accuracy = (
|
|
|
|
|
+ correct_predictions / total_predictions if total_predictions > 0 else 0.0
|
|
|
|
|
+)
|
|
|
|
|
+print(
|
|
|
|
|
+ clr.Fore.MAGENTA
|
|
|
|
|
+ + f"\nOverall Accuracy (Threshold 0.5): {overall_accuracy:.4f}"
|
|
|
|
|
+ + clr.Style.RESET_ALL
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# Then go through all individual models and print out their accuracies for comparison, sorted from highest to lowest
|
|
|
|
|
+model_accuracies = []
|
|
|
|
|
+for model_idx in predictions.coords["model"].values:
|
|
|
|
|
+ model_preds = predictions.sel(model=model_idx)
|
|
|
|
|
+ predicted_positive = model_preds.sel(img_class=1) >= 0.5
|
|
|
|
|
+ correct_predictions = (predicted_positive == true_positive).sum().item()
|
|
|
|
|
+ accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
|
|
|
|
|
+ model_accuracies.append((model_idx, accuracy))
|
|
|
|
|
+
|
|
|
|
|
+# Sort by accuracy
|
|
|
|
|
+model_accuracies.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
+print(
|
|
|
|
|
+ clr.Fore.CYAN
|
|
|
|
|
+ + f"\nIndividual Model Accuracies (Threshold 0.5):"
|
|
|
|
|
+ + clr.Style.RESET_ALL
|
|
|
|
|
+)
|
|
|
|
|
+for model_idx, accuracy in model_accuracies:
|
|
|
|
|
+ print(f"Model {int(model_idx):<3}: Accuracy: {accuracy:.4f}")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# Then calculate the average accuracy if we were to ensemble the top K models, for K=1 to total number of models
|
|
|
|
|
+total_models = len(predictions.coords["model"].values)
|
|
|
|
|
+ensemble_accuracies = []
|
|
|
|
|
+for k in range(1, total_models + 1):
|
|
|
|
|
+ top_k_models = [ma[0] for ma in model_accuracies[:k]]
|
|
|
|
|
+ ensemble_preds = predictions.sel(model=top_k_models).mean(dim="model")
|
|
|
|
|
+ predicted_positive = ensemble_preds.sel(img_class=1) >= 0.5
|
|
|
|
|
+ correct_predictions = (predicted_positive == true_positive).sum().item()
|
|
|
|
|
+ accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
|
|
|
|
|
+ ensemble_accuracies.append((k, accuracy))
|
|
|
|
|
+print(
|
|
|
|
|
+ clr.Fore.CYAN
|
|
|
|
|
+ + f"\nEnsemble Accuracies for Top K Models (Threshold 0.5):"
|
|
|
|
|
+ + clr.Style.RESET_ALL
|
|
|
|
|
+)
|
|
|
|
|
+for k, accuracy in ensemble_accuracies:
|
|
|
|
|
+ print(f"Top {k:<3} Models: Ensemble Accuracy: {accuracy:.4f}")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# Finally, identify the top 5 most confidently incorrect predictions
|
|
|
|
|
+incorrect_predictions = []
|
|
|
|
|
+for i in range(len(avg_predictions.img_id)):
|
|
|
|
|
+ confidence = avg_predictions.sel(img_class=1).isel(img_id=i).item()
|
|
|
|
|
+ label = labels.isel(img_id=i, label=1).values
|
|
|
|
|
+ predicted_label = 1 if confidence >= 0.5 else 0
|
|
|
|
|
+ if predicted_label != label:
|
|
|
|
|
+ incorrect_predictions.append((i, confidence, label))
|
|
|
|
|
+# Sort by confidence
|
|
|
|
|
+incorrect_predictions.sort(key=lambda x: -abs(x[1] - 0.5))
|
|
|
|
|
+top_incorrect = incorrect_predictions[:5]
|
|
|
|
|
+print(
|
|
|
|
|
+ clr.Fore.YELLOW
|
|
|
|
|
+ + f"\nTop 5 Most Confident Incorrect Predictions:"
|
|
|
|
|
+ + clr.Style.RESET_ALL
|
|
|
|
|
+)
|
|
|
|
|
+for i, confidence, label in top_incorrect:
|
|
|
|
|
+ predicted_label = 1 if confidence >= 0.5 else 0
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"Image ID: {avg_predictions.img_id.isel(img_id=i).item():<8}, "
|
|
|
|
|
+ f"Confidence: {confidence:.4f}, "
|
|
|
|
|
+ f"Predicted Label: {predicted_label:<3}, "
|
|
|
|
|
+ f"True Label: {label:<3}"
|
|
|
|
|
+ )
|