| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- import xarray as xr
- from utils.config import config
- import pathlib as pl
- import numpy as np
- import os
- # Load the evaluation results
- os.chdir(pl.Path(__file__).parent)
- model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
- config["analysis"]["evaluation_name"].strip()
- ).with_suffix(".nc")
- print(f"Loading evaluation results from {model_dataset_path}")
- array = xr.open_dataset(model_dataset_path) # type: ignore
- # This dataset includes two dataarrays: 'predictions' and 'labels'
- # For the first analysis, the goal is to average the predictions across all models for each image, then to determine the accuracy of these averaged predictions against the true labels, graphing accurac vs confidence threshold.
- predictions: xr.DataArray = array["predictions"]
- labels: xr.DataArray = array["labels"]
- # Average predictions across models
- avg_predictions = predictions.mean(dim="model")
- # Loop through different confidence thresholds and calculate accuracy
- thresholds = np.linspace(0.5, 1.0, num=10) # From 0.5 to 1.0
- accuracies: list[float] = []
- for i, threshold in enumerate(thresholds):
- # pick the positive class for the lables and predictions
- predicted_positive = avg_predictions.sel(img_class=1) >= threshold
- true_positive = labels.sel(label=1) == 1
- # Calculate accuracy
- correct_predictions = (predicted_positive == true_positive).sum().item()
- # For debugging, print list of predictions, labels and correctness
- total_predictions = len(avg_predictions.img_id)
- accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
- accuracies.append(accuracy)
- # Print the accuracies for each threshold
- for threshold, accuracy in zip(thresholds, accuracies):
- print(f"Threshold: {threshold:.2f}, Accuracy: {accuracy:.4f}")
|