# pyright: basic from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import Any import numpy as np import pandas as pd import xarray as xr from bayesian_torch.utils.util import predictive_entropy @dataclass class BackendEvaluation: backend: str source_file: Path image_ids: np.ndarray y_true: np.ndarray y_prob: np.ndarray uncertainty_confidence: np.ndarray uncertainty_std: np.ndarray uncertainty_metric: str def _resolve_dataset_path(model_output_dir: Path) -> Path: primary = model_output_dir / "model_evaluation_results.nc" if primary.exists(): return primary candidates = sorted(model_output_dir.glob("*.nc")) if not candidates: raise FileNotFoundError(f"No netCDF file found under {model_output_dir}") return candidates[0] def _positive_probability( predictions: xr.DataArray, class_index: int, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, str]: if "img_class" not in predictions.dims: raise ValueError("predictions is missing required dim: img_class") if class_index >= predictions.sizes["img_class"]: raise ValueError( f"positive class index {class_index} is out of bounds for img_class size {predictions.sizes['img_class']}" ) if "model" in predictions.dims: class_probs = predictions.isel(img_class=class_index) prob_mean = class_probs.mean(dim="model").values # Confidence is the direct model output probability for the predicted class. prob_std = class_probs.std(dim="model").values return ( np.asarray(prob_mean, dtype=float), np.asarray(prob_mean, dtype=float), np.asarray(prob_std, dtype=float), "std", ) sample_like = [d for d in predictions.dims if d in {"sample", "mc_sample", "draw"}] if sample_like: dim = str(sample_like[0]) class_probs = predictions.isel(img_class=class_index) prob_mean = class_probs.mean(dim=dim).values # For Bayesian MC predictions, uncertainty should come from predictive # entropy of the predictive distribution rather than classwise std. mc_preds = predictions.transpose(dim, "img_id", "img_class").values entropy_uncertainty = predictive_entropy(np.asarray(mc_preds, dtype=float)) return ( np.asarray(prob_mean, dtype=float), np.asarray(prob_mean, dtype=float), np.asarray(entropy_uncertainty, dtype=float), "predictive_entropy", ) prob = predictions.isel(img_class=class_index).values return ( np.asarray(prob, dtype=float), np.asarray(prob, dtype=float), np.full_like(np.asarray(prob, dtype=float), np.nan), "unknown", ) def _labels_to_binary(labels: xr.DataArray, class_index: int) -> np.ndarray: if "label" in labels.dims: if class_index >= labels.sizes["label"]: raise ValueError( f"positive class index {class_index} is out of bounds for label size {labels.sizes['label']}" ) # One-hot labels expected in this repository. binary = labels.argmax(dim="label").values == class_index return np.asarray(binary, dtype=int) # Fallback if labels are already binary. return np.asarray(labels.values, dtype=int) def load_backend_evaluation( config: dict[str, Any], backend: str, class_index: int, ) -> BackendEvaluation: output_key = f"{backend}_path" if output_key not in config["output"]: raise KeyError(f"Missing output path key in config: output.{output_key}") model_output_dir = Path(config["output"][output_key]).expanduser().resolve() ds_path = _resolve_dataset_path(model_output_dir) ds = xr.open_dataset(ds_path) if "predictions" not in ds or "labels" not in ds: raise ValueError( f"Dataset {ds_path} must contain predictions and labels variables" ) predictions = ds["predictions"] labels = ds["labels"] if "img_id" in predictions.coords: image_ids = np.asarray(predictions.coords["img_id"].values) elif "img_id" in labels.coords: image_ids = np.asarray(labels.coords["img_id"].values) else: length = predictions.sizes.get("img_id", labels.sizes.get("img_id")) if length is None: raise ValueError("Could not infer img_id length from predictions/labels") image_ids = np.arange(length) y_true = _labels_to_binary(labels, class_index=class_index) y_prob, conf, y_std, uncertainty_metric = _positive_probability( predictions, class_index=class_index ) if len(y_true) != len(y_prob): raise ValueError( f"Length mismatch after loading backend {backend}: labels={len(y_true)}, probs={len(y_prob)}" ) return BackendEvaluation( backend=backend, source_file=ds_path, image_ids=image_ids, y_true=y_true, y_prob=y_prob, uncertainty_confidence=conf, uncertainty_std=y_std, uncertainty_metric=uncertainty_metric, ) def load_clinical_table(config: dict[str, Any], root_dir: Path) -> pd.DataFrame: csv_path = (root_dir / config["data"]["xls_file_path"]).resolve() df = pd.read_csv(csv_path) df.columns = df.columns.str.strip() return df def physician_column(df: pd.DataFrame) -> str: exact = "DXCONFID" if exact in df.columns: return exact for col in df.columns: if "dxconfid" in col.lower(): return col raise KeyError( "No physician confidence column with DXCONFID found in clinical table" )