nschense
/
senior_research_thesis


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
							import xarray as xr
from utils.config import config
import pathlib as pl
import numpy as np
import os

# Load the evaluation results
os.chdir(pl.Path(__file__).parent)
model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
    config["analysis"]["evaluation_name"].strip()
).with_suffix(".nc")

print(f"Loading evaluation results from {model_dataset_path}")

array = xr.open_dataset(model_dataset_path)  # type: ignore


# This dataset includes two dataarrays: 'predictions' and 'labels'

# For the first analysis, the goal is to average the predictions across all models for each image, then to determine the accuracy of these averaged predictions against the true labels, graphing accurac vs confidence threshold.

predictions: xr.DataArray = array["predictions"]
labels: xr.DataArray = array["labels"]

# Average predictions across models
avg_predictions = predictions.mean(dim="model")

# Loop through different confidence thresholds and calculate accuracy
thresholds = np.linspace(0.5, 1.0, num=10)  # From 0.5 to 1.0
accuracies: list[float] = []

for i, threshold in enumerate(thresholds):
    # pick the positive class for the lables and predictions
    predicted_positive = avg_predictions.sel(img_class=1) >= threshold
    true_positive = labels.sel(label=1) == 1

    # Calculate accuracy
    correct_predictions = (predicted_positive == true_positive).sum().item()

    # For debugging, print list of predictions, labels and correctness

    total_predictions = len(avg_predictions.img_id)
    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
    accuracies.append(accuracy)

# Print the accuracies for each threshold
for threshold, accuracy in zip(thresholds, accuracies):
    print(f"Threshold: {threshold:.2f}, Accuracy: {accuracy:.4f}")