nschense
/
alnn_rewrite


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243
							import xarray as xr
from utils.config import config
import pathlib as pl
import pandas as pd
import numpy as np


# Load the evaluation results
array = xr.open_dataset(pl.Path(config["output"]["path"]) / "model_evaluation_results.nc")  # type: ignore


# This dataset includes two dataarrays: 'predictions' and 'labels'

# For the first analysis, the goal is to average the predictions across all models for each image, then to determine the accuracy of these averaged predictions against the true labels, graphing accurac vs confidence threshold.

predictions: xr.DataArray = array["predictions"]
labels: xr.DataArray = array["labels"]

# Average predictions across models
avg_predictions = predictions.mean(dim="model")

# Loop through different confidence thresholds and calculate accuracy
thresholds = np.linspace(0.5, 1.0, num=10)  # From 0.5 to 1.0
accuracies = []

for i, threshold in enumerate(thresholds):
    # pick the positive class for the lables and predictions
    predicted_positive = avg_predictions.sel(img_class=1) >= threshold
    true_positive = labels.sel(label=1) == 1


    # Calculate accuracy
    correct_predictions = (predicted_positive == true_positive).sum().item()

    # For debugging, print list of predictions, labels and correctness

    total_predictions = len(avg_predictions.img_id)
    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
    accuracies.append(accuracy)

# Print the accuracies for each threshold
for threshold, accuracy in zip(thresholds, accuracies):
    print(f"Threshold: {threshold:.2f}, Accuracy: {accuracy:.4f}")