6 дней назад · e79e7f50c2
--- a/analysis/data_access.py
+++ b/analysis/data_access.py
@@ -51,14 +51,11 @@ def _positive_probability(
 
															     if "model" in predictions.dims:
														
 
															         class_probs = predictions.isel(img_class=class_index)
														
 
															         prob_mean = class_probs.mean(dim="model").values
														
 
															-        # Confidence is defined as distance from 0.5 and averaged across models.
														
 
															-        class_prob_arr = np.asarray(class_probs.values, dtype=float)
														
 
															-        model_axis = class_probs.dims.index("model")
														
 
															-        conf_mean = np.abs(class_prob_arr - 0.5).mean(axis=model_axis)
														
 
															+        # Confidence is the direct model output probability for the predicted class.
														
 
															         prob_std = class_probs.std(dim="model").values
														
 
															         return (
														
 
															             np.asarray(prob_mean, dtype=float),
														
 
															-            np.asarray(conf_mean, dtype=float),
														
 
															+            np.asarray(prob_mean, dtype=float),
														
 
															             np.asarray(prob_std, dtype=float),
														
 
															             "std",
														
 
															         )
														
@@ -68,9 +65,6 @@ def _positive_probability(
 
															         dim = str(sample_like[0])
														
 
															         class_probs = predictions.isel(img_class=class_index)
														
 
															         prob_mean = class_probs.mean(dim=dim).values
														
 
															-        class_prob_arr = np.asarray(class_probs.values, dtype=float)
														
 
															-        sample_axis = class_probs.dims.index(dim)
														
 
															-        conf_mean = np.abs(class_prob_arr - 0.5).mean(axis=sample_axis)
														
 
															         # For Bayesian MC predictions, uncertainty should come from predictive
														
 
															         # entropy of the predictive distribution rather than classwise std.
														
@@ -78,16 +72,15 @@ def _positive_probability(
 
															         entropy_uncertainty = predictive_entropy(np.asarray(mc_preds, dtype=float))
														
 
															         return (
														
 
															             np.asarray(prob_mean, dtype=float),
														
 
															-            np.asarray(conf_mean, dtype=float),
														
 
															+            np.asarray(prob_mean, dtype=float),
														
 
															             np.asarray(entropy_uncertainty, dtype=float),
														
 
															             "predictive_entropy",
														
 
															         )
														
 
															     prob = predictions.isel(img_class=class_index).values
														
 
															-    conf = np.abs(np.asarray(prob, dtype=float) - 0.5)
														
 
															     return (
														
 
															         np.asarray(prob, dtype=float),
														
 
															-        np.asarray(conf, dtype=float),
														
 
															+        np.asarray(prob, dtype=float),
														
 
															         np.full_like(np.asarray(prob, dtype=float), np.nan),
														
 
															         "unknown",
														
 
															     )
														
--- a/analysis/evaluate_models.py
+++ b/analysis/evaluate_models.py
@@ -7,6 +7,7 @@ from pathlib import Path
 
															 from typing import Any
														
 
															 import pandas as pd
														
 
															+from tqdm.auto import tqdm
														
 
															 from analysis.analysis_modules import (
														
 
															     run_calibration,
														
@@ -229,65 +230,91 @@ def _run_backend(
 
															         "uncertainty_metric": evaluation.uncertainty_metric,
														
 
															     }
														
 
															-    summary["performance"] = run_performance(
														
 
															-        evaluation=evaluation,
														
 
															-        output_dir=out_dir,
														
 
															-        thresholds=thresholds,
														
 
															-    )
														
 
															-    summary["calibration"] = run_calibration(
														
 
															-        evaluation=evaluation,
														
 
															-        output_dir=out_dir,
														
 
															-        bins=DEFAULT_CALIBRATION_BINS,
														
 
															-    )
														
 
															-    summary["physician"] = run_physician(
														
 
															-        evaluation=evaluation,
														
 
															-        clinical_df=clinical_df,
														
 
															-        output_dir=out_dir,
														
 
															-    )
														
 
															-    summary["longitudinal"] = run_longitudinal(
														
 
															-        evaluation=evaluation,
														
 
															-        clinical_df=clinical_df,
														
 
															-        output_dir=out_dir,
														
 
															+    n_stages = 4 + (0 if skip_noise else 2)
														
 
															+    stage_bar = tqdm(
														
 
															+        total=n_stages,
														
 
															+        desc=f"[{backend}] analysis stages",
														
 
															+        unit="stage",
														
 
															+        leave=False,
														
 
															     )
														
 
															+    try:
														
 
															+        stage_bar.set_postfix_str("performance")
														
 
															+        summary["performance"] = run_performance(
														
 
															+            evaluation=evaluation,
														
 
															+            output_dir=out_dir,
														
 
															+            thresholds=thresholds,
														
 
															+        )
														
 
															+        stage_bar.update(1)
														
 
															-    if skip_noise:
														
 
															-        summary["noise"] = {"skipped": True, "reason": "--skip-noise supplied"}
														
 
															-        summary["noise_accuracy_uncertainty"] = {
														
 
															-            "skipped": True,
														
 
															-            "reason": "Noise analysis skipped, so no noise table available.",
														
 
															-        }
														
 
															-    else:
														
 
															-        try:
														
 
															-            summary["noise"] = run_noise_analysis(
														
 
															-                config=config,
														
 
															-                root_dir=root_dir,
														
 
															-                backend=backend,
														
 
															-                output_dir=out_dir,
														
 
															-                class_index=DEFAULT_POSITIVE_CLASS_INDEX,
														
 
															-                noise_sigmas=noise_factors,
														
 
															-                threshold=DEFAULT_DECISION_THRESHOLD,
														
 
															-                calibration_bins=DEFAULT_CALIBRATION_BINS,
														
 
															-                bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
														
 
															-            )
														
 
															+        stage_bar.set_postfix_str("calibration")
														
 
															+        summary["calibration"] = run_calibration(
														
 
															+            evaluation=evaluation,
														
 
															+            output_dir=out_dir,
														
 
															+            bins=DEFAULT_CALIBRATION_BINS,
														
 
															+        )
														
 
															+        stage_bar.update(1)
														
 
															-            noise_table_path = Path(str(summary["noise"]["table"]))
														
 
															-            noise_df = pd.read_csv(noise_table_path)
														
 
															-            summary["noise_accuracy_uncertainty"] = (
														
 
															-                run_noise_accuracy_uncertainty_analysis(
														
 
															-                    noise_df=noise_df,
														
 
															-                    backend=backend,
														
 
															-                    output_dir=out_dir,
														
 
															-                )
														
 
															-            )
														
 
															-        except Exception as exc:
														
 
															-            summary["noise"] = {
														
 
															-                "skipped": True,
														
 
															-                "reason": f"Noise analysis failed: {exc}",
														
 
															-            }
														
 
															+        stage_bar.set_postfix_str("physician")
														
 
															+        summary["physician"] = run_physician(
														
 
															+            evaluation=evaluation,
														
 
															+            clinical_df=clinical_df,
														
 
															+            output_dir=out_dir,
														
 
															+        )
														
 
															+        stage_bar.update(1)
														
 
															+
														
 
															+        stage_bar.set_postfix_str("longitudinal")
														
 
															+        summary["longitudinal"] = run_longitudinal(
														
 
															+            evaluation=evaluation,
														
 
															+            clinical_df=clinical_df,
														
 
															+            output_dir=out_dir,
														
 
															+        )
														
 
															+        stage_bar.update(1)
														
 
															+
														
 
															+        if skip_noise:
														
 
															+            summary["noise"] = {"skipped": True, "reason": "--skip-noise supplied"}
														
 
															             summary["noise_accuracy_uncertainty"] = {
														
 
															                 "skipped": True,
														
 
															-                "reason": f"Noise relationship analysis failed: {exc}",
														
 
															+                "reason": "Noise analysis skipped, so no noise table available.",
														
 
															             }
														
 
															+        else:
														
 
															+            try:
														
 
															+                stage_bar.set_postfix_str("noise")
														
 
															+                summary["noise"] = run_noise_analysis(
														
 
															+                    config=config,
														
 
															+                    root_dir=root_dir,
														
 
															+                    backend=backend,
														
 
															+                    output_dir=out_dir,
														
 
															+                    class_index=DEFAULT_POSITIVE_CLASS_INDEX,
														
 
															+                    noise_sigmas=noise_factors,
														
 
															+                    threshold=DEFAULT_DECISION_THRESHOLD,
														
 
															+                    calibration_bins=DEFAULT_CALIBRATION_BINS,
														
 
															+                    bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
														
 
															+                )
														
 
															+                stage_bar.update(1)
														
 
															+
														
 
															+                stage_bar.set_postfix_str("noise-correlation")
														
 
															+                noise_table_path = Path(str(summary["noise"]["table"]))
														
 
															+                noise_df = pd.read_csv(noise_table_path)
														
 
															+                summary["noise_accuracy_uncertainty"] = (
														
 
															+                    run_noise_accuracy_uncertainty_analysis(
														
 
															+                        noise_df=noise_df,
														
 
															+                        backend=backend,
														
 
															+                        output_dir=out_dir,
														
 
															+                    )
														
 
															+                )
														
 
															+                stage_bar.update(1)
														
 
															+            except Exception as exc:
														
 
															+                summary["noise"] = {
														
 
															+                    "skipped": True,
														
 
															+                    "reason": f"Noise analysis failed: {exc}",
														
 
															+                }
														
 
															+                summary["noise_accuracy_uncertainty"] = {
														
 
															+                    "skipped": True,
														
 
															+                    "reason": f"Noise relationship analysis failed: {exc}",
														
 
															+                }
														
 
															+                stage_bar.update(2)
														
 
															+    finally:
														
 
															+        stage_bar.close()
														
 
															     report_path = _write_backend_plot_report(backend=backend, out_dir=out_dir)
														
 
															     summary["plots_report"] = str(report_path)
														
@@ -341,8 +368,10 @@ def main() -> None:
 
															         print(f"Dataset summary complete. Results saved to {paths.run_dir}")
														
 
															         return
														
 
															-    for backend in args.backend:
														
 
															+    backend_iter = tqdm(args.backend, desc="Backends", unit="backend")
														
 
															+    for backend in backend_iter:
														
 
															         out_dir = backend_dir(paths, backend)
														
 
															+        backend_iter.set_postfix_str(backend)
														
 
															         if args.longitudinal_breakdown_only:
														
 
															             manifest["backends"][backend] = _run_longitudinal_breakdown_only(
														
 
															                 config=config,
														
--- a/analysis/holdout_evaluation.py
+++ b/analysis/holdout_evaluation.py
@@ -9,6 +9,7 @@ from typing import Any
 
															 import numpy as np
														
 
															 import torch
														
 
															 from torch.utils.data import DataLoader
														
 
															+from tqdm.auto import tqdm
														
 
															 import xarray as xr
														
 
															 from model.cnn import CNN3D
														
@@ -66,7 +67,9 @@ def _evaluate_ensemble(
 
															     labels = np.zeros((n_samples, n_classes), dtype=np.float32)
														
 
															     image_ids = np.zeros((n_samples,), dtype=int)
														
 
															-    for model_i, model_file in enumerate(model_files):
														
 
															+    model_iter = tqdm(model_files, desc="Ensemble checkpoints", unit="model")
														
 
															+    for model_i, model_file in enumerate(model_iter):
														
 
															+        model_iter.set_postfix_str(model_file.name)
														
 
															         model = _init_cnn(config)
														
 
															         model.load_state_dict(
														
 
															             torch.load(model_file, map_location=device),
														
@@ -76,7 +79,14 @@ def _evaluate_ensemble(
 
															         model.eval()
														
 
															         with torch.no_grad():
														
 
															-            for sample_i, (mri, xls, label, img_id) in enumerate(holdout_loader):
														
 
															+            sample_iter = tqdm(
														
 
															+                holdout_loader,
														
 
															+                total=n_samples,
														
 
															+                desc=f"{model_file.name}",
														
 
															+                unit="batch",
														
 
															+                leave=False,
														
 
															+            )
														
 
															+            for sample_i, (mri, xls, label, img_id) in enumerate(sample_iter):
														
 
															                 mri_device = mri.float().to(device)
														
 
															                 xls_device = xls.float().to(device)
														
 
															                 output = model((mri_device, xls_device))
														
@@ -156,8 +166,17 @@ def _evaluate_bayesian(
 
															     image_ids = np.zeros((n_samples,), dtype=int)
														
 
															     with torch.no_grad():
														
 
															-        for pass_i in range(mc_passes):
														
 
															-            for sample_i, (mri, xls, label, img_id) in enumerate(holdout_loader):
														
 
															+        pass_iter = tqdm(range(mc_passes), desc="Bayesian MC passes", unit="pass")
														
 
															+        for pass_i in pass_iter:
														
 
															+            pass_iter.set_postfix_str(f"pass={pass_i + 1}/{mc_passes}")
														
 
															+            sample_iter = tqdm(
														
 
															+                holdout_loader,
														
 
															+                total=n_samples,
														
 
															+                desc=f"MC pass {pass_i + 1}",
														
 
															+                unit="batch",
														
 
															+                leave=False,
														
 
															+            )
														
 
															+            for sample_i, (mri, xls, label, img_id) in enumerate(sample_iter):
														
 
															                 mri_device = mri.float().to(device)
														
 
															                 xls_device = xls.float().to(device)
														
 
															                 output = model((mri_device, xls_device))
														
--- a/analysis/noise_analysis.py
+++ b/analysis/noise_analysis.py
@@ -9,6 +9,7 @@ import numpy as np
 
															 import pandas as pd
														
 
															 import torch
														
 
															 from bayesian_torch.utils.util import predictive_entropy
														
 
															+from tqdm.auto import tqdm
														
 
															 from model.cnn import CNN3D
														
@@ -132,7 +133,14 @@ def _infer_with_noise_ensemble(
 
															     all_true: list[int] = []
														
 
															     with torch.no_grad():
														
 
															-        for mri, xls, labels, _ in test_loader:
														
 
															+        batch_iter = tqdm(
														
 
															+            test_loader,
														
 
															+            total=len(test_loader),
														
 
															+            desc=f"ensemble sigma={sigma:g}",
														
 
															+            unit="batch",
														
 
															+            leave=False,
														
 
															+        )
														
 
															+        for mri, xls, labels, _ in batch_iter:
														
 
															             mri_device = mri.float().to(device)
														
 
															             xls_device = xls.float().to(device)
														
 
															             labels_device = labels.to(device)
														
@@ -144,7 +152,7 @@ def _infer_with_noise_ensemble(
 
															             pred_mat = np.stack(preds, axis=0)
														
 
															             mean = pred_mat.mean(axis=0)
														
 
															-            confidence = np.abs(pred_mat - 0.5).mean(axis=0)
														
 
															+            confidence = mean
														
 
															             std = pred_mat.std(axis=0)
														
 
															             true = labels_device[:, class_index].detach().cpu().numpy().astype(int)
														
@@ -176,7 +184,14 @@ def _infer_with_noise_bayesian(
 
															     all_true: list[int] = []
														
 
															     with torch.no_grad():
														
 
															-        for mri, xls, labels, _ in test_loader:
														
 
															+        batch_iter = tqdm(
														
 
															+            test_loader,
														
 
															+            total=len(test_loader),
														
 
															+            desc=f"bayesian sigma={sigma:g}",
														
 
															+            unit="batch",
														
 
															+            leave=False,
														
 
															+        )
														
 
															+        for mri, xls, labels, _ in batch_iter:
														
 
															             mri_device = mri.float().to(device)
														
 
															             xls_device = xls.float().to(device)
														
 
															             labels_device = labels.to(device)
														
@@ -188,7 +203,7 @@ def _infer_with_noise_bayesian(
 
															             draw_mat = np.stack(draws, axis=0)  # (mc_passes, batch, classes)
														
 
															             mean = draw_mat.mean(axis=0)[:, class_index]
														
 
															-            confidence = np.abs(draw_mat[:, :, class_index] - 0.5).mean(axis=0)
														
 
															+            confidence = mean
														
 
															             entropy_uncertainty = predictive_entropy(draw_mat)
														
 
															             true = labels_device[:, class_index].detach().cpu().numpy().astype(int)
														
@@ -237,7 +252,9 @@ def run_noise_analysis(
 
															     if backend == "ensemble":
														
 
															         models = _load_ensemble_models(config)
														
 
															         example_rows: list[tuple[float, torch.Tensor]] = []
														
 
															-        for sigma in noise_sigmas:
														
 
															+        sigma_iter = tqdm(noise_sigmas, desc="Noise sweep (ensemble)", unit="sigma")
														
 
															+        for sigma in sigma_iter:
														
 
															+            sigma_iter.set_postfix_str(f"sigma={sigma:g}")
														
 
															             y_true, y_prob, y_confidence, y_std = _infer_with_noise_ensemble(
														
 
															                 test_loader,
														
 
															                 models,
														
@@ -286,7 +303,9 @@ def run_noise_analysis(
 
															     elif backend == "bayesian":
														
 
															         model = _load_bayesian_model(config)
														
 
															         example_rows = []
														
 
															-        for sigma in noise_sigmas:
														
 
															+        sigma_iter = tqdm(noise_sigmas, desc="Noise sweep (bayesian)", unit="sigma")
														
 
															+        for sigma in sigma_iter:
														
 
															+            sigma_iter.set_postfix_str(f"sigma={sigma:g}")
														
 
															             y_true, y_prob, y_confidence, y_std = _infer_with_noise_bayesian(
														
 
															                 test_loader,
														
 
															                 model,
														
--- a/analysis/plotting.py
+++ b/analysis/plotting.py
@@ -10,7 +10,6 @@ import pandas as pd
 
															 import torch
														
 
															 from matplotlib.axes import Axes
														
 
															-
														
 
															 # Easily editable plot text overrides by plot key.
														
 
															 # Example:
														
 
															 # "performance_threshold": {
														
@@ -124,6 +123,56 @@ def _plot_correct_incorrect_bars(
 
															     bars_ax.grid(False)
														
 
															+def save_coverage_bar_plot(
														
 
															+    x_values: pd.Series | np.ndarray,
														
 
															+    n_correct: pd.Series | np.ndarray,
														
 
															+    n_incorrect: pd.Series | np.ndarray,
														
 
															+    x_label: str,
														
 
															+    title: str,
														
 
															+    output_path: Path,
														
 
															+) -> None:
														
 
															+    """Save a standalone bar chart showing sample counts (correct vs incorrect)."""
														
 
															+    x = np.asarray(x_values, dtype=float)
														
 
															+    correct = np.asarray(n_correct, dtype=float)
														
 
															+    incorrect = np.asarray(n_incorrect, dtype=float)
														
 
															+    if x.size == 0 or correct.size == 0 or incorrect.size == 0:
														
 
															+        return
														
 
															+
														
 
															+    width = float(np.diff(np.sort(x)).min()) * 0.8 if x.size > 1 else 0.04
														
 
															+    max_count = float(max(np.nanmax(correct), np.nanmax(incorrect), 1.0))
														
 
															+
														
 
															+    fig, ax = plt.subplots(figsize=(10, 5))
														
 
															+    ax.bar(
														
 
															+        x,
														
 
															+        correct,
														
 
															+        width=width,
														
 
															+        color="#2ca02c",
														
 
															+        alpha=0.6,
														
 
															+        label="correct",
														
 
															+        align="center",
														
 
															+    )
														
 
															+    ax.bar(
														
 
															+        x,
														
 
															+        -incorrect,
														
 
															+        width=width,
														
 
															+        color="#d62728",
														
 
															+        alpha=0.6,
														
 
															+        label="incorrect",
														
 
															+        align="center",
														
 
															+    )
														
 
															+    ax.axhline(0.0, color="gray", linewidth=0.8, alpha=0.4)
														
 
															+    ax.set_ylim(-1.15 * max_count, 1.15 * max_count)
														
 
															+    ax.set_xlabel(x_label)
														
 
															+    ax.set_ylabel("Sample Count")
														
 
															+    ax.set_title(title)
														
 
															+    ax.legend()
														
 
															+    ax.grid(True, alpha=0.3)
														
 
															+    fig.tight_layout()
														
 
															+    output_path.parent.mkdir(parents=True, exist_ok=True)
														
 
															+    fig.savefig(output_path)
														
 
															+    plt.close(fig)
														
 
															+
														
 
															+
														
 
															 def plots_dir(output_dir: Path) -> Path:
														
 
															     plots = output_dir / "plots"
														
 
															     plots.mkdir(parents=True, exist_ok=True)
														
@@ -153,7 +202,6 @@ def save_performance_threshold_plot(
 
															     )
														
 
															     fig, ax = plt.subplots(figsize=(10, 5))
														
 
															-    _plot_correct_incorrect_bars(ax, df["threshold"], n_correct, n_incorrect)
														
 
															     ax.plot(df["threshold"], df[metric_column], label=metric_label, marker="o")
														
 
															     ax.set_xlabel(x_label)
														
 
															     ax.set_ylabel(y_label)
														
@@ -165,6 +213,17 @@ def save_performance_threshold_plot(
 
															     fig.savefig(output_path)
														
 
															     plt.close(fig)
														
 
															+    # Generate separate coverage bar plot
														
 
															+    coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
														
 
															+    save_coverage_bar_plot(
														
 
															+        x_values=df["threshold"],
														
 
															+        n_correct=n_correct,
														
 
															+        n_incorrect=n_incorrect,
														
 
															+        x_label=x_label,
														
 
															+        title=f"Sample Distribution vs Decision Threshold ({backend})",
														
 
															+        output_path=coverage_path,
														
 
															+    )
														
 
															+
														
 
															 def save_performance_threshold_pair_plot(
														
 
															     df: pd.DataFrame,
														
@@ -191,7 +250,6 @@ def save_performance_threshold_pair_plot(
 
															         (axes[0], "accuracy", "Accuracy", "o"),
														
 
															         (axes[1], "f1", "F1", "s"),
														
 
															     ]:
														
 
															-        _plot_correct_incorrect_bars(ax, df["threshold"], n_correct, n_incorrect)
														
 
															         ax.plot(df["threshold"], df[metric_col], label=metric_label, marker=marker)
														
 
															         ax.set_xlabel(x_label)
														
 
															         ax.set_ylabel(metric_label)
														
@@ -205,6 +263,17 @@ def save_performance_threshold_pair_plot(
 
															     fig.savefig(output_path)
														
 
															     plt.close(fig)
														
 
															+    # Generate separate coverage bar plot
														
 
															+    coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
														
 
															+    save_coverage_bar_plot(
														
 
															+        x_values=df["threshold"],
														
 
															+        n_correct=n_correct,
														
 
															+        n_incorrect=n_incorrect,
														
 
															+        x_label=x_label,
														
 
															+        title=f"Sample Distribution vs Decision Threshold ({backend})",
														
 
															+        output_path=coverage_path,
														
 
															+    )
														
 
															+
														
 
															 def save_uncertainty_cutoff_plot(
														
 
															     cutoff_df: pd.DataFrame,
														
@@ -223,23 +292,6 @@ def save_uncertainty_cutoff_plot(
 
															     )
														
 
															     fig, ax = plt.subplots(figsize=(10, 5))
														
 
															-    first_group = (
														
 
															-        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
														
 
															-        .groupby("uncertainty_type", as_index=False)
														
 
															-        .head(1)
														
 
															-    )
														
 
															-    if not first_group.empty:
														
 
															-        # Draw count bars once; uncertainty lines are overlaid afterwards.
														
 
															-        rep_name = str(first_group.iloc[0]["uncertainty_type"])
														
 
															-        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
														
 
															-            "restriction_level"
														
 
															-        )
														
 
															-        _plot_correct_incorrect_bars(
														
 
															-            ax,
														
 
															-            rep["restriction_level"],
														
 
															-            pd.to_numeric(rep["n_correct"], errors="coerce"),
														
 
															-            pd.to_numeric(rep["n_incorrect"], errors="coerce"),
														
 
															-        )
														
 
															     for uncertainty_name, group in cutoff_df.groupby("uncertainty_type"):
														
 
															         g = group.sort_values("restriction_level")
														
@@ -260,6 +312,27 @@ def save_uncertainty_cutoff_plot(
 
															     fig.savefig(output_path)
														
 
															     plt.close(fig)
														
 
															+    # Generate separate coverage bar plot
														
 
															+    first_group = (
														
 
															+        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
														
 
															+        .groupby("uncertainty_type", as_index=False)
														
 
															+        .head(1)
														
 
															+    )
														
 
															+    if not first_group.empty:
														
 
															+        rep_name = str(first_group.iloc[0]["uncertainty_type"])
														
 
															+        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
														
 
															+            "restriction_level"
														
 
															+        )
														
 
															+        coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
														
 
															+        save_coverage_bar_plot(
														
 
															+            x_values=rep["restriction_level"],
														
 
															+            n_correct=pd.to_numeric(rep["n_correct"], errors="coerce"),
														
 
															+            n_incorrect=pd.to_numeric(rep["n_incorrect"], errors="coerce"),
														
 
															+            x_label=x_label_final,
														
 
															+            title=f"Sample Coverage vs {title_prefix}",
														
 
															+            output_path=coverage_path,
														
 
															+        )
														
 
															+
														
 
															 def save_uncertainty_cutoff_pair_plot(
														
 
															     cutoff_df: pd.DataFrame,
														
@@ -276,23 +349,6 @@ def save_uncertainty_cutoff_pair_plot(
 
															     )
														
 
															     fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharex=True)
														
 
															-    first_group = (
														
 
															-        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
														
 
															-        .groupby("uncertainty_type", as_index=False)
														
 
															-        .head(1)
														
 
															-    )
														
 
															-    if not first_group.empty:
														
 
															-        rep_name = str(first_group.iloc[0]["uncertainty_type"])
														
 
															-        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
														
 
															-            "restriction_level"
														
 
															-        )
														
 
															-        for ax in axes:
														
 
															-            _plot_correct_incorrect_bars(
														
 
															-                ax,
														
 
															-                rep["restriction_level"],
														
 
															-                pd.to_numeric(rep["n_correct"], errors="coerce"),
														
 
															-                pd.to_numeric(rep["n_incorrect"], errors="coerce"),
														
 
															-            )
														
 
															     for uncertainty_name, group in cutoff_df.groupby("uncertainty_type"):
														
 
															         g = group.sort_values("restriction_level")
														
@@ -317,6 +373,27 @@ def save_uncertainty_cutoff_pair_plot(
 
															     fig.savefig(output_path)
														
 
															     plt.close(fig)
														
 
															+    # Generate separate coverage bar plot
														
 
															+    first_group = (
														
 
															+        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
														
 
															+        .groupby("uncertainty_type", as_index=False)
														
 
															+        .head(1)
														
 
															+    )
														
 
															+    if not first_group.empty:
														
 
															+        rep_name = str(first_group.iloc[0]["uncertainty_type"])
														
 
															+        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
														
 
															+            "restriction_level"
														
 
															+        )
														
 
															+        coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
														
 
															+        save_coverage_bar_plot(
														
 
															+            x_values=rep["restriction_level"],
														
 
															+            n_correct=pd.to_numeric(rep["n_correct"], errors="coerce"),
														
 
															+            n_incorrect=pd.to_numeric(rep["n_incorrect"], errors="coerce"),
														
 
															+            x_label=x_label_final,
														
 
															+            title=f"Sample Coverage vs {title_prefix}",
														
 
															+            output_path=coverage_path,
														
 
															+        )
														
 
															+
														
 
															 def save_calibration_plot(per_bin: np.ndarray, backend: str, output_path: Path) -> None:
														
 
															     title, x_label, y_label = _resolve_plot_text(
														
--- a/analysis/regenerate_plots.py
+++ b/analysis/regenerate_plots.py
@@ -0,0 +1,327 @@
 
															+# pyright: basic
														
 
															+
														
 
															+"""Regenerate analysis plots from existing computed data (CSV files).
														
 
															+
														
 
															+This script regenerates all plots from previously computed analysis results
														
 
															+without re-running the full analysis pipeline. Useful when making changes
														
 
															+to plotting parameters or fixing visualizations.
														
 
															+
														
 
															+Usage: Run from the project root (alnn_rewrite directory):
														
 
															+    python analysis/regenerate_plots.py /path/to/run_directory/backend_name
														
 
															+
														
 
															+Example:
														
 
															+    python analysis/regenerate_plots.py analysis_output/run_20260428_120000/ensemble
														
 
															+"""
														
 
															+
														
 
															+from __future__ import annotations
														
 
															+
														
 
															+import argparse
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+from typing import Any
														
 
															+
														
 
															+import numpy as np
														
 
															+import pandas as pd
														
 
															+
														
 
															+# Add parent directory to path for imports
														
 
															+sys.path.insert(0, str(Path(__file__).parent.parent))
														
 
															+
														
 
															+from analysis.analysis_modules import _uncertainty_cutoff_analysis
														
 
															+from analysis.defaults import (
														
 
															+    DEFAULT_CALIBRATION_BINS,
														
 
															+    DEFAULT_DECISION_THRESHOLD,
														
 
															+    uncertainty_cutoff_percentiles,
														
 
															+)
														
 
															+from analysis.plotting import (
														
 
															+    plots_dir,
														
 
															+    save_calibration_plot,
														
 
															+    save_performance_threshold_pair_plot,
														
 
															+    save_performance_threshold_plot,
														
 
															+    save_uncertainty_cutoff_pair_plot,
														
 
															+    save_uncertainty_cutoff_plot,
														
 
															+)
														
 
															+from analysis.runtime import write_json
														
 
															+
														
 
															+
														
 
															+def _plot_description(filename: str) -> str:
														
 
															+    descriptions = {
														
 
															+        "performance_threshold_accuracy.png": "Accuracy as the decision threshold varies.",
														
 
															+        "performance_threshold_f1.png": "F1 score as the decision threshold varies.",
														
 
															+        "performance_threshold_accuracy_f1.png": "Accuracy and F1 shown side-by-side as the decision threshold varies.",
														
 
															+        "performance_uncertainty_cutoff_accuracy.png": "Accuracy while progressively restricting to higher-confidence and uncertainty-metric subsets.",
														
 
															+        "performance_uncertainty_cutoff_f1.png": "F1 score while progressively restricting to higher-confidence and uncertainty-metric subsets.",
														
 
															+        "performance_uncertainty_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across uncertainty-cutoff restriction levels.",
														
 
															+        "performance_uncertainty_percentile_cutoff_accuracy.png": "Accuracy from least to most restricted percentile-wise subset selection.",
														
 
															+        "performance_uncertainty_percentile_cutoff_f1.png": "F1 score from least to most restricted percentile-wise subset selection.",
														
 
															+        "performance_uncertainty_percentile_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across percentile-floor restriction levels.",
														
 
															+        "calibration_reliability.png": "Reliability diagram comparing predicted probability to empirical outcome frequency.",
														
 
															+        "performance_threshold_accuracy_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
														
 
															+        "performance_threshold_f1_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
														
 
															+        "performance_threshold_accuracy_f1_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
														
 
															+        "performance_uncertainty_cutoff_accuracy_coverage.png": "Sample coverage breakdown across restriction levels.",
														
 
															+        "performance_uncertainty_cutoff_f1_coverage.png": "Sample coverage breakdown across restriction levels.",
														
 
															+        "performance_uncertainty_cutoff_accuracy_f1_coverage.png": "Sample coverage breakdown across restriction levels.",
														
 
															+        "performance_uncertainty_percentile_cutoff_accuracy_coverage.png": "Sample coverage breakdown as percentile floor increases.",
														
 
															+        "performance_uncertainty_percentile_cutoff_f1_coverage.png": "Sample coverage breakdown as percentile floor increases.",
														
 
															+        "performance_uncertainty_percentile_cutoff_accuracy_f1_coverage.png": "Sample coverage breakdown as percentile floor increases.",
														
 
															+    }
														
 
															+    return descriptions.get(filename, "Generated analysis plot.")
														
 
															+
														
 
															+
														
 
															+def _write_backend_plot_report(backend: str, out_dir: Path) -> Path:
														
 
															+    plots = out_dir / "plots"
														
 
															+    images = sorted(plots.rglob("*.png")) if plots.exists() else []
														
 
															+
														
 
															+    report_path = out_dir / "plots_report.md"
														
 
															+    lines = [
														
 
															+        f"# {backend.title()} Analysis Plot Report (Regenerated)",
														
 
															+        "",
														
 
															+        "This document lists regenerated analysis plots with brief descriptions.",
														
 
															+        "",
														
 
															+    ]
														
 
															+    if not images:
														
 
															+        lines.append("No plot images were found for this backend run.")
														
 
															+    else:
														
 
															+        for image_path in images:
														
 
															+            rel = image_path.relative_to(out_dir).as_posix()
														
 
															+            title = image_path.stem.replace("_", " ").title()
														
 
															+            lines.append(f"## {title}")
														
 
															+            lines.append(_plot_description(image_path.name))
														
 
															+            lines.append("")
														
 
															+            lines.append(f"![{title}]({rel})")
														
 
															+            lines.append("")
														
 
															+
														
 
															+    report_path.write_text("\n".join(lines), encoding="utf-8")
														
 
															+    return report_path
														
 
															+
														
 
															+
														
 
															+def regenerate_performance_plots(backend_dir: Path) -> dict[str, Any]:
														
 
															+    """Regenerate performance threshold plots from existing CSV."""
														
 
															+    perf_csv = backend_dir / "performance_threshold_sweep.csv"
														
 
															+    if not perf_csv.exists():
														
 
															+        return {"status": "skipped", "reason": "no performance_threshold_sweep.csv"}
														
 
															+
														
 
															+    df = pd.read_csv(perf_csv)
														
 
															+    backend = backend_dir.name if backend_dir.name != "plots" else "ensemble"
														
 
															+
														
 
															+    # Get backend name from parent directory name if not found
														
 
															+    if backend_dir.parent.name not in ["ensemble", "bayesian"]:
														
 
															+        parent_name = backend_dir.name
														
 
															+        if parent_name in {"ensemble", "bayesian"}:
														
 
															+            backend = parent_name
														
 
															+
														
 
															+    accuracy_plot_path = plots_dir(backend_dir) / "performance_threshold_accuracy.png"
														
 
															+    f1_plot_path = plots_dir(backend_dir) / "performance_threshold_f1.png"
														
 
															+    pair_plot_path = plots_dir(backend_dir) / "performance_threshold_accuracy_f1.png"
														
 
															+
														
 
															+    save_performance_threshold_plot(
														
 
															+        df=df,
														
 
															+        backend=backend,
														
 
															+        output_path=accuracy_plot_path,
														
 
															+        metric_column="accuracy",
														
 
															+        metric_label="Accuracy",
														
 
															+        plot_key="performance_threshold_accuracy",
														
 
															+    )
														
 
															+    save_performance_threshold_plot(
														
 
															+        df=df,
														
 
															+        backend=backend,
														
 
															+        output_path=f1_plot_path,
														
 
															+        metric_column="f1",
														
 
															+        metric_label="F1",
														
 
															+        plot_key="performance_threshold_f1",
														
 
															+    )
														
 
															+    save_performance_threshold_pair_plot(
														
 
															+        df=df,
														
 
															+        backend=backend,
														
 
															+        output_path=pair_plot_path,
														
 
															+        plot_key="performance_threshold_accuracy_f1",
														
 
															+    )
														
 
															+
														
 
															+    return {
														
 
															+        "status": "regenerated",
														
 
															+        "performance_threshold_accuracy": str(accuracy_plot_path),
														
 
															+        "performance_threshold_f1": str(f1_plot_path),
														
 
															+        "performance_threshold_accuracy_f1": str(pair_plot_path),
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def regenerate_uncertainty_cutoff_plots(backend_dir: Path) -> dict[str, Any]:
														
 
															+    """Regenerate uncertainty cutoff plots from existing CSV."""
														
 
															+    cutoff_csv = backend_dir / "performance_uncertainty_cutoff.csv"
														
 
															+    percentile_csv = backend_dir / "performance_uncertainty_percentile_cutoff.csv"
														
 
															+
														
 
															+    results = {"status": "skipped", "reason": "no cutoff CSV files found"}
														
 
															+
														
 
															+    if cutoff_csv.exists():
														
 
															+        cutoff_df = pd.read_csv(cutoff_csv)
														
 
															+        results["status"] = "regenerated"
														
 
															+
														
 
															+        # Create plots by uncertainty type
														
 
															+        for uncertainty_name in sorted(pd.unique(cutoff_df["uncertainty_type"])):
														
 
															+            sub_df = cutoff_df[cutoff_df["uncertainty_type"] == uncertainty_name].copy()
														
 
															+            slug = uncertainty_name.lower().replace(" ", "_")
														
 
															+
														
 
															+            sub_accuracy_plot_path = (
														
 
															+                plots_dir(backend_dir)
														
 
															+                / f"performance_uncertainty_cutoff_{slug}_accuracy.png"
														
 
															+            )
														
 
															+            sub_f1_plot_path = (
														
 
															+                plots_dir(backend_dir) / f"performance_uncertainty_cutoff_{slug}_f1.png"
														
 
															+            )
														
 
															+            sub_pair_plot_path = (
														
 
															+                plots_dir(backend_dir)
														
 
															+                / f"performance_uncertainty_cutoff_{slug}_accuracy_f1.png"
														
 
															+            )
														
 
															+
														
 
															+            save_uncertainty_cutoff_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
														
 
															+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
														
 
															+                output_path=sub_accuracy_plot_path,
														
 
															+                metric_column="accuracy",
														
 
															+                metric_label="Accuracy",
														
 
															+                plot_key="performance_uncertainty_cutoff_accuracy",
														
 
															+            )
														
 
															+            save_uncertainty_cutoff_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
														
 
															+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
														
 
															+                output_path=sub_f1_plot_path,
														
 
															+                metric_column="f1",
														
 
															+                metric_label="F1",
														
 
															+                plot_key="performance_uncertainty_cutoff_f1",
														
 
															+            )
														
 
															+            save_uncertainty_cutoff_pair_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
														
 
															+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
														
 
															+                output_path=sub_pair_plot_path,
														
 
															+                plot_key="performance_uncertainty_cutoff_accuracy_f1",
														
 
															+            )
														
 
															+
														
 
															+    if percentile_csv.exists():
														
 
															+        percentile_df = pd.read_csv(percentile_csv)
														
 
															+        results["status"] = "regenerated"
														
 
															+
														
 
															+        # Create plots by uncertainty type
														
 
															+        for uncertainty_name in sorted(pd.unique(percentile_df["uncertainty_type"])):
														
 
															+            sub_df = percentile_df[
														
 
															+                percentile_df["uncertainty_type"] == uncertainty_name
														
 
															+            ].copy()
														
 
															+            slug = uncertainty_name.lower().replace(" ", "_")
														
 
															+
														
 
															+            sub_accuracy_plot_path = (
														
 
															+                plots_dir(backend_dir)
														
 
															+                / f"performance_uncertainty_percentile_cutoff_{slug}_accuracy.png"
														
 
															+            )
														
 
															+            sub_f1_plot_path = (
														
 
															+                plots_dir(backend_dir)
														
 
															+                / f"performance_uncertainty_percentile_cutoff_{slug}_f1.png"
														
 
															+            )
														
 
															+            sub_pair_plot_path = (
														
 
															+                plots_dir(backend_dir)
														
 
															+                / f"performance_uncertainty_percentile_cutoff_{slug}_accuracy_f1.png"
														
 
															+            )
														
 
															+
														
 
															+            save_uncertainty_cutoff_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Percentile Floor",
														
 
															+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
														
 
															+                output_path=sub_accuracy_plot_path,
														
 
															+                metric_column="accuracy",
														
 
															+                metric_label="Accuracy",
														
 
															+                plot_key="performance_uncertainty_percentile_cutoff_accuracy",
														
 
															+            )
														
 
															+            save_uncertainty_cutoff_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Percentile Floor",
														
 
															+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
														
 
															+                output_path=sub_f1_plot_path,
														
 
															+                metric_column="f1",
														
 
															+                metric_label="F1",
														
 
															+                plot_key="performance_uncertainty_percentile_cutoff_f1",
														
 
															+            )
														
 
															+            save_uncertainty_cutoff_pair_plot(
														
 
															+                cutoff_df=sub_df,
														
 
															+                title_prefix="Model Output / Uncertainty Percentile Floor",
														
 
															+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
														
 
															+                output_path=sub_pair_plot_path,
														
 
															+                plot_key="performance_uncertainty_percentile_cutoff_accuracy_f1",
														
 
															+            )
														
 
															+
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															+def regenerate_calibration_plots(backend_dir: Path) -> dict[str, Any]:
														
 
															+    """Regenerate calibration plots from existing calibration data."""
														
 
															+    calib_path = backend_dir / "calibration_per_bin.npy"
														
 
															+    if not calib_path.exists():
														
 
															+        return {"status": "skipped", "reason": "no calibration_per_bin.npy"}
														
 
															+
														
 
															+    per_bin = np.load(calib_path)
														
 
															+    backend = backend_dir.name if backend_dir.name != "plots" else "ensemble"
														
 
															+
														
 
															+    # Get backend name from parent directory name if not found
														
 
															+    if backend_dir.parent.name not in ["ensemble", "bayesian"]:
														
 
															+        parent_name = backend_dir.name
														
 
															+        if parent_name in {"ensemble", "bayesian"}:
														
 
															+            backend = parent_name
														
 
															+
														
 
															+    plot_path = plots_dir(backend_dir) / "calibration_reliability.png"
														
 
															+    save_calibration_plot(per_bin=per_bin, backend=backend, output_path=plot_path)
														
 
															+
														
 
															+    return {
														
 
															+        "status": "regenerated",
														
 
															+        "calibration_reliability": str(plot_path),
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def main() -> None:
														
 
															+    parser = argparse.ArgumentParser(
														
 
															+        description="Regenerate analysis plots from existing computed data CSV files."
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        "backend_dir",
														
 
															+        type=Path,
														
 
															+        help="Path to backend-specific analysis output directory "
														
 
															+        "(e.g., analysis_output/run_xxx/ensemble)",
														
 
															+    )
														
 
															+
														
 
															+    args = parser.parse_args()
														
 
															+    backend_dir = args.backend_dir.resolve()
														
 
															+
														
 
															+    if not backend_dir.exists():
														
 
															+        print(
														
 
															+            f"Error: Backend directory does not exist: {backend_dir}", file=sys.stderr
														
 
															+        )
														
 
															+        sys.exit(1)
														
 
															+
														
 
															+    print(f"Regenerating plots from: {backend_dir}")
														
 
															+
														
 
															+    results: dict[str, Any] = {
														
 
															+        "backend_dir": str(backend_dir),
														
 
															+        "performance": regenerate_performance_plots(backend_dir),
														
 
															+        "uncertainty_cutoff": regenerate_uncertainty_cutoff_plots(backend_dir),
														
 
															+        "calibration": regenerate_calibration_plots(backend_dir),
														
 
															+    }
														
 
															+
														
 
															+    # Write updated report
														
 
															+    report_path = _write_backend_plot_report(
														
 
															+        backend=backend_dir.name, out_dir=backend_dir
														
 
															+    )
														
 
															+    results["plots_report"] = str(report_path)
														
 
															+
														
 
															+    print(f"\nPlot regeneration complete!")
														
 
															+    print(f"Results summary:")
														
 
															+    print(f"  Performance plots: {results['performance'].get('status', 'unknown')}")
														
 
															+    print(
														
 
															+        f"  Uncertainty cutoff plots: {results['uncertainty_cutoff'].get('status', 'unknown')}"
														
 
															+    )
														
 
															+    print(f"  Calibration plots: {results['calibration'].get('status', 'unknown')}")
														
 
															+    print(f"  Report written to: {report_path}")
														
 
															+
														
 
															+    write_json(backend_dir / "plot_regeneration_log.json", results)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()