| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- # The purpose of this file is to perform a sensitivity analysis on the model evaluation results and graph the findings.
- # The sensitivity analysis will be done by varying the number of models used in the ensemble and observing the effect on overall accuracy.
- # We will take 50 different random arrangemnts of models for each ensemble size (other than 50, which is the full set) to get a distribution of accuracies for each ensemble size.
- # The will have associated error bars based on the standard deviation of the accuracies for each ensemble size.
- import xarray as xr
- from utils.config import config
- import pathlib as pl
- import numpy as np
- import matplotlib.pyplot as plt
- import os
- # Load the evaluation results
- os.chdir(pl.Path(__file__).parent)
- model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
- config["analysis"]["evaluation_name"].strip()
- ).with_suffix(".nc")
- print(f"Loading evaluation results from {model_dataset_path}")
- array = xr.open_dataset(model_dataset_path) # type: ignore
- # This section was generated by Github Copilot - 2025-11-04
- # Perform sensitivity analysis by varying ensemble size and sampling subsets of models.
- predictions: xr.DataArray = array["predictions"]
- labels: xr.DataArray = array["labels"]
- # Make plots directory if it doesn't exist (matching other scripts)
- plots_dir = (
- pl.Path("../output") / pl.Path(config["analysis"]["evaluation_name"]) / "plots"
- )
- plots_dir.mkdir(parents=True, exist_ok=True)
- # Configuration for the sensitivity analysis
- rng = np.random.default_rng(42)
- num_models = int(predictions.sizes["model"])
- ensemble_sizes = list(range(1, num_models + 1))
- samples_per_size = 50
- # Extract true labels for the positive class (assumes same structure as other script)
- true_labels = labels.sel(label=1).values # shape: (n_samples,)
- # Container for results
- mean_accuracies: list[float] = []
- std_accuracies: list[float] = []
- all_accuracies: dict[int, list[float]] = {k: [] for k in ensemble_sizes}
- for k in ensemble_sizes:
- accuracies_k = []
- # If using the full set, evaluate once deterministically
- if k == num_models:
- selected_idx = np.arange(num_models)
- preds_selected = predictions.isel(model=selected_idx).mean(dim="model")
- confs = preds_selected.sel(img_class=1).values
- predicted_positive = confs >= 0.5
- true_positive = true_labels == 1
- acc = (predicted_positive == true_positive).sum().item() / len(confs)
- accuracies_k.append(acc)
- else:
- for _ in range(samples_per_size):
- selected_idx = rng.choice(num_models, size=k, replace=False)
- preds_selected = predictions.isel(model=selected_idx).mean(dim="model")
- confs = preds_selected.sel(img_class=1).values
- predicted_positive = confs >= 0.5
- true_positive = true_labels == 1
- acc = (predicted_positive == true_positive).sum().item() / len(confs)
- accuracies_k.append(acc)
- all_accuracies[k] = accuracies_k
- mean_accuracies.append(float(np.mean(accuracies_k)))
- std_accuracies.append(float(np.std(accuracies_k, ddof=0)))
- # Plot mean accuracy vs ensemble size with error bars (std)
- plt.figure(figsize=(10, 6))
- plt.errorbar(ensemble_sizes, mean_accuracies, yerr=std_accuracies, fmt="-o", capsize=3)
- plt.title("Sensitivity Analysis: Accuracy vs Ensemble Size")
- plt.xlabel("Number of Models in Ensemble")
- plt.ylabel("Accuracy")
- plt.grid(True)
- # Set x-ticks every 5 models (and always include the final model count)
- ticks = list(range(1, num_models + 1, 5))
- if len(ticks) == 0 or ticks[-1] != num_models:
- ticks.append(num_models)
- plt.xticks(ticks)
- # Optionally overlay raw sample distributions as jittered points
- for i, k in enumerate(ensemble_sizes):
- y = all_accuracies[k]
- x = np.full(len(y), k) + (rng.random(len(y)) - 0.5) * 0.2 # small jitter
- plt.scatter(x, y, alpha=0.3, s=8, color="gray")
- plt.tight_layout()
- plt.savefig(plots_dir / "sensitivity_accuracy_vs_ensemble_size.png")
- # End of Copilot section
|