| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- import xarray as xr
- import numpy as np
- from utils.config import config
- import pathlib as pl
- import colorama as clr
- import os
- os.chdir(pl.Path(__file__).parent)
- model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
- config["analysis"]["evaluation_name"].strip()
- ).with_suffix(".nc")
- array = xr.open_dataset(model_dataset_path) # type: ignore
- predictions: xr.DataArray = array["predictions"]
- labels: xr.DataArray = array["labels"]
- # Average predictions across models
- avg_predictions = predictions.mean(dim="model")
- # Sort from highest to lowest confidence for the positive class (img_class=1)
- sorted_indices = np.argsort(-avg_predictions.sel(img_class=1).values)
- sorted_avg_predictions = avg_predictions.isel(img_id=sorted_indices)
- sorted_labels = labels.isel(img_id=sorted_indices)
- # Print out all predictions with their labels
- top_n = sorted_avg_predictions.sizes[
- "img_id"
- ] # Change this value to print more or fewer
- print(
- clr.Fore.CYAN
- + f"Top {top_n} Predictions (Confidence for Positive Class):"
- + clr.Style.RESET_ALL
- )
- for i in range(top_n):
- confidence = sorted_avg_predictions.sel(img_class=1).isel(img_id=i).item()
- label = sorted_labels.isel(img_id=i, label=1).values
- correctness = (
- "CORRECT"
- if (confidence >= 0.5 and label == 1) or (confidence < 0.5 and label == 0)
- else "INCORRECT"
- )
- color = clr.Fore.GREEN if correctness == "CORRECT" else clr.Fore.RED
- print(
- f"Image ID: {sorted_avg_predictions.img_id.isel(img_id=i).item():<8}, "
- f"Confidence: {confidence:.4f}, "
- f"Label: {label:<3}, " + color + f"{correctness:<9}" + clr.Style.RESET_ALL
- )
- # Calculate overall accuracy
- predicted_positive = avg_predictions.sel(img_class=1) >= 0.5
- true_positive = labels.sel(label=1) == 1
- correct_predictions = (predicted_positive == true_positive).sum().item()
- total_predictions = len(avg_predictions.img_id)
- overall_accuracy = (
- correct_predictions / total_predictions if total_predictions > 0 else 0.0
- )
- print(
- clr.Fore.MAGENTA
- + f"\nOverall Accuracy (Threshold 0.5): {overall_accuracy:.4f}"
- + clr.Style.RESET_ALL
- )
- # Then go through all individual models and print out their accuracies for comparison, sorted from highest to lowest
- model_accuracies = []
- for model_idx in predictions.coords["model"].values:
- model_preds = predictions.sel(model=model_idx)
- predicted_positive = model_preds.sel(img_class=1) >= 0.5
- correct_predictions = (predicted_positive == true_positive).sum().item()
- accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
- model_accuracies.append((model_idx, accuracy))
- # Sort by accuracy
- model_accuracies.sort(key=lambda x: x[1], reverse=True)
- print(
- clr.Fore.CYAN
- + f"\nIndividual Model Accuracies (Threshold 0.5):"
- + clr.Style.RESET_ALL
- )
- for model_idx, accuracy in model_accuracies:
- print(f"Model {int(model_idx):<3}: Accuracy: {accuracy:.4f}")
- # Then calculate the average accuracy if we were to ensemble the top K models, for K=1 to total number of models
- total_models = len(predictions.coords["model"].values)
- ensemble_accuracies = []
- for k in range(1, total_models + 1):
- top_k_models = [ma[0] for ma in model_accuracies[:k]]
- ensemble_preds = predictions.sel(model=top_k_models).mean(dim="model")
- predicted_positive = ensemble_preds.sel(img_class=1) >= 0.5
- correct_predictions = (predicted_positive == true_positive).sum().item()
- accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
- ensemble_accuracies.append((k, accuracy))
- print(
- clr.Fore.CYAN
- + f"\nEnsemble Accuracies for Top K Models (Threshold 0.5):"
- + clr.Style.RESET_ALL
- )
- for k, accuracy in ensemble_accuracies:
- print(f"Top {k:<3} Models: Ensemble Accuracy: {accuracy:.4f}")
- # Finally, identify the top 5 most confidently incorrect predictions
- incorrect_predictions = []
- for i in range(len(avg_predictions.img_id)):
- confidence = avg_predictions.sel(img_class=1).isel(img_id=i).item()
- label = labels.isel(img_id=i, label=1).values
- predicted_label = 1 if confidence >= 0.5 else 0
- if predicted_label != label:
- incorrect_predictions.append((i, confidence, label))
- # Sort by confidence
- incorrect_predictions.sort(key=lambda x: -abs(x[1] - 0.5))
- top_incorrect = incorrect_predictions[:5]
- print(
- clr.Fore.YELLOW
- + f"\nTop 5 Most Confident Incorrect Predictions:"
- + clr.Style.RESET_ALL
- )
- for i, confidence, label in top_incorrect:
- predicted_label = 1 if confidence >= 0.5 else 0
- print(
- f"Image ID: {avg_predictions.img_id.isel(img_id=i).item():<8}, "
- f"Confidence: {confidence:.4f}, "
- f"Predicted Label: {predicted_label:<3}, "
- f"True Label: {label:<3}"
- )
|