6 giorni fa · e79e7f50c2
--- a/analysis/data_access.py
+++ b/analysis/data_access.py
@@ -51,14 +51,11 @@ def _positive_probability(
 
				     if "model" in predictions.dims:
			
 
				         class_probs = predictions.isel(img_class=class_index)
			
 
				         prob_mean = class_probs.mean(dim="model").values
			
 
				-        # Confidence is defined as distance from 0.5 and averaged across models.
			
 
				-        class_prob_arr = np.asarray(class_probs.values, dtype=float)
			
 
				-        model_axis = class_probs.dims.index("model")
			
 
				-        conf_mean = np.abs(class_prob_arr - 0.5).mean(axis=model_axis)
			
 
				+        # Confidence is the direct model output probability for the predicted class.
			
 
				         prob_std = class_probs.std(dim="model").values
			
 
				         return (
			
 
				             np.asarray(prob_mean, dtype=float),
			
 
				-            np.asarray(conf_mean, dtype=float),
			
 
				+            np.asarray(prob_mean, dtype=float),
			
 
				             np.asarray(prob_std, dtype=float),
			
 
				             "std",
			
 
				         )
			
@@ -68,9 +65,6 @@ def _positive_probability(
 
				         dim = str(sample_like[0])
			
 
				         class_probs = predictions.isel(img_class=class_index)
			
 
				         prob_mean = class_probs.mean(dim=dim).values
			
 
				-        class_prob_arr = np.asarray(class_probs.values, dtype=float)
			
 
				-        sample_axis = class_probs.dims.index(dim)
			
 
				-        conf_mean = np.abs(class_prob_arr - 0.5).mean(axis=sample_axis)
			
 
				 
			
 
				         # For Bayesian MC predictions, uncertainty should come from predictive
			
 
				         # entropy of the predictive distribution rather than classwise std.
			
@@ -78,16 +72,15 @@ def _positive_probability(
 
				         entropy_uncertainty = predictive_entropy(np.asarray(mc_preds, dtype=float))
			
 
				         return (
			
 
				             np.asarray(prob_mean, dtype=float),
			
 
				-            np.asarray(conf_mean, dtype=float),
			
 
				+            np.asarray(prob_mean, dtype=float),
			
 
				             np.asarray(entropy_uncertainty, dtype=float),
			
 
				             "predictive_entropy",
			
 
				         )
			
 
				 
			
 
				     prob = predictions.isel(img_class=class_index).values
			
 
				-    conf = np.abs(np.asarray(prob, dtype=float) - 0.5)
			
 
				     return (
			
 
				         np.asarray(prob, dtype=float),
			
 
				-        np.asarray(conf, dtype=float),
			
 
				+        np.asarray(prob, dtype=float),
			
 
				         np.full_like(np.asarray(prob, dtype=float), np.nan),
			
 
				         "unknown",
			
 
				     )
			
--- a/analysis/evaluate_models.py
+++ b/analysis/evaluate_models.py
@@ -7,6 +7,7 @@ from pathlib import Path
 
				 from typing import Any
			
 
				 
			
 
				 import pandas as pd
			
 
				+from tqdm.auto import tqdm
			
 
				 
			
 
				 from analysis.analysis_modules import (
			
 
				     run_calibration,
			
@@ -229,65 +230,91 @@ def _run_backend(
 
				         "uncertainty_metric": evaluation.uncertainty_metric,
			
 
				     }
			
 
				 
			
 
				-    summary["performance"] = run_performance(
			
 
				-        evaluation=evaluation,
			
 
				-        output_dir=out_dir,
			
 
				-        thresholds=thresholds,
			
 
				-    )
			
 
				-    summary["calibration"] = run_calibration(
			
 
				-        evaluation=evaluation,
			
 
				-        output_dir=out_dir,
			
 
				-        bins=DEFAULT_CALIBRATION_BINS,
			
 
				-    )
			
 
				-    summary["physician"] = run_physician(
			
 
				-        evaluation=evaluation,
			
 
				-        clinical_df=clinical_df,
			
 
				-        output_dir=out_dir,
			
 
				-    )
			
 
				-    summary["longitudinal"] = run_longitudinal(
			
 
				-        evaluation=evaluation,
			
 
				-        clinical_df=clinical_df,
			
 
				-        output_dir=out_dir,
			
 
				+    n_stages = 4 + (0 if skip_noise else 2)
			
 
				+    stage_bar = tqdm(
			
 
				+        total=n_stages,
			
 
				+        desc=f"[{backend}] analysis stages",
			
 
				+        unit="stage",
			
 
				+        leave=False,
			
 
				     )
			
 
				+    try:
			
 
				+        stage_bar.set_postfix_str("performance")
			
 
				+        summary["performance"] = run_performance(
			
 
				+            evaluation=evaluation,
			
 
				+            output_dir=out_dir,
			
 
				+            thresholds=thresholds,
			
 
				+        )
			
 
				+        stage_bar.update(1)
			
 
				 
			
 
				-    if skip_noise:
			
 
				-        summary["noise"] = {"skipped": True, "reason": "--skip-noise supplied"}
			
 
				-        summary["noise_accuracy_uncertainty"] = {
			
 
				-            "skipped": True,
			
 
				-            "reason": "Noise analysis skipped, so no noise table available.",
			
 
				-        }
			
 
				-    else:
			
 
				-        try:
			
 
				-            summary["noise"] = run_noise_analysis(
			
 
				-                config=config,
			
 
				-                root_dir=root_dir,
			
 
				-                backend=backend,
			
 
				-                output_dir=out_dir,
			
 
				-                class_index=DEFAULT_POSITIVE_CLASS_INDEX,
			
 
				-                noise_sigmas=noise_factors,
			
 
				-                threshold=DEFAULT_DECISION_THRESHOLD,
			
 
				-                calibration_bins=DEFAULT_CALIBRATION_BINS,
			
 
				-                bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
			
 
				-            )
			
 
				+        stage_bar.set_postfix_str("calibration")
			
 
				+        summary["calibration"] = run_calibration(
			
 
				+            evaluation=evaluation,
			
 
				+            output_dir=out_dir,
			
 
				+            bins=DEFAULT_CALIBRATION_BINS,
			
 
				+        )
			
 
				+        stage_bar.update(1)
			
 
				 
			
 
				-            noise_table_path = Path(str(summary["noise"]["table"]))
			
 
				-            noise_df = pd.read_csv(noise_table_path)
			
 
				-            summary["noise_accuracy_uncertainty"] = (
			
 
				-                run_noise_accuracy_uncertainty_analysis(
			
 
				-                    noise_df=noise_df,
			
 
				-                    backend=backend,
			
 
				-                    output_dir=out_dir,
			
 
				-                )
			
 
				-            )
			
 
				-        except Exception as exc:
			
 
				-            summary["noise"] = {
			
 
				-                "skipped": True,
			
 
				-                "reason": f"Noise analysis failed: {exc}",
			
 
				-            }
			
 
				+        stage_bar.set_postfix_str("physician")
			
 
				+        summary["physician"] = run_physician(
			
 
				+            evaluation=evaluation,
			
 
				+            clinical_df=clinical_df,
			
 
				+            output_dir=out_dir,
			
 
				+        )
			
 
				+        stage_bar.update(1)
			
 
				+
			
 
				+        stage_bar.set_postfix_str("longitudinal")
			
 
				+        summary["longitudinal"] = run_longitudinal(
			
 
				+            evaluation=evaluation,
			
 
				+            clinical_df=clinical_df,
			
 
				+            output_dir=out_dir,
			
 
				+        )
			
 
				+        stage_bar.update(1)
			
 
				+
			
 
				+        if skip_noise:
			
 
				+            summary["noise"] = {"skipped": True, "reason": "--skip-noise supplied"}
			
 
				             summary["noise_accuracy_uncertainty"] = {
			
 
				                 "skipped": True,
			
 
				-                "reason": f"Noise relationship analysis failed: {exc}",
			
 
				+                "reason": "Noise analysis skipped, so no noise table available.",
			
 
				             }
			
 
				+        else:
			
 
				+            try:
			
 
				+                stage_bar.set_postfix_str("noise")
			
 
				+                summary["noise"] = run_noise_analysis(
			
 
				+                    config=config,
			
 
				+                    root_dir=root_dir,
			
 
				+                    backend=backend,
			
 
				+                    output_dir=out_dir,
			
 
				+                    class_index=DEFAULT_POSITIVE_CLASS_INDEX,
			
 
				+                    noise_sigmas=noise_factors,
			
 
				+                    threshold=DEFAULT_DECISION_THRESHOLD,
			
 
				+                    calibration_bins=DEFAULT_CALIBRATION_BINS,
			
 
				+                    bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
			
 
				+                )
			
 
				+                stage_bar.update(1)
			
 
				+
			
 
				+                stage_bar.set_postfix_str("noise-correlation")
			
 
				+                noise_table_path = Path(str(summary["noise"]["table"]))
			
 
				+                noise_df = pd.read_csv(noise_table_path)
			
 
				+                summary["noise_accuracy_uncertainty"] = (
			
 
				+                    run_noise_accuracy_uncertainty_analysis(
			
 
				+                        noise_df=noise_df,
			
 
				+                        backend=backend,
			
 
				+                        output_dir=out_dir,
			
 
				+                    )
			
 
				+                )
			
 
				+                stage_bar.update(1)
			
 
				+            except Exception as exc:
			
 
				+                summary["noise"] = {
			
 
				+                    "skipped": True,
			
 
				+                    "reason": f"Noise analysis failed: {exc}",
			
 
				+                }
			
 
				+                summary["noise_accuracy_uncertainty"] = {
			
 
				+                    "skipped": True,
			
 
				+                    "reason": f"Noise relationship analysis failed: {exc}",
			
 
				+                }
			
 
				+                stage_bar.update(2)
			
 
				+    finally:
			
 
				+        stage_bar.close()
			
 
				 
			
 
				     report_path = _write_backend_plot_report(backend=backend, out_dir=out_dir)
			
 
				     summary["plots_report"] = str(report_path)
			
@@ -341,8 +368,10 @@ def main() -> None:
 
				         print(f"Dataset summary complete. Results saved to {paths.run_dir}")
			
 
				         return
			
 
				 
			
 
				-    for backend in args.backend:
			
 
				+    backend_iter = tqdm(args.backend, desc="Backends", unit="backend")
			
 
				+    for backend in backend_iter:
			
 
				         out_dir = backend_dir(paths, backend)
			
 
				+        backend_iter.set_postfix_str(backend)
			
 
				         if args.longitudinal_breakdown_only:
			
 
				             manifest["backends"][backend] = _run_longitudinal_breakdown_only(
			
 
				                 config=config,
			
--- a/analysis/holdout_evaluation.py
+++ b/analysis/holdout_evaluation.py
@@ -9,6 +9,7 @@ from typing import Any
 
				 import numpy as np
			
 
				 import torch
			
 
				 from torch.utils.data import DataLoader
			
 
				+from tqdm.auto import tqdm
			
 
				 import xarray as xr
			
 
				 
			
 
				 from model.cnn import CNN3D
			
@@ -66,7 +67,9 @@ def _evaluate_ensemble(
 
				     labels = np.zeros((n_samples, n_classes), dtype=np.float32)
			
 
				     image_ids = np.zeros((n_samples,), dtype=int)
			
 
				 
			
 
				-    for model_i, model_file in enumerate(model_files):
			
 
				+    model_iter = tqdm(model_files, desc="Ensemble checkpoints", unit="model")
			
 
				+    for model_i, model_file in enumerate(model_iter):
			
 
				+        model_iter.set_postfix_str(model_file.name)
			
 
				         model = _init_cnn(config)
			
 
				         model.load_state_dict(
			
 
				             torch.load(model_file, map_location=device),
			
@@ -76,7 +79,14 @@ def _evaluate_ensemble(
 
				         model.eval()
			
 
				 
			
 
				         with torch.no_grad():
			
 
				-            for sample_i, (mri, xls, label, img_id) in enumerate(holdout_loader):
			
 
				+            sample_iter = tqdm(
			
 
				+                holdout_loader,
			
 
				+                total=n_samples,
			
 
				+                desc=f"{model_file.name}",
			
 
				+                unit="batch",
			
 
				+                leave=False,
			
 
				+            )
			
 
				+            for sample_i, (mri, xls, label, img_id) in enumerate(sample_iter):
			
 
				                 mri_device = mri.float().to(device)
			
 
				                 xls_device = xls.float().to(device)
			
 
				                 output = model((mri_device, xls_device))
			
@@ -156,8 +166,17 @@ def _evaluate_bayesian(
 
				     image_ids = np.zeros((n_samples,), dtype=int)
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for pass_i in range(mc_passes):
			
 
				-            for sample_i, (mri, xls, label, img_id) in enumerate(holdout_loader):
			
 
				+        pass_iter = tqdm(range(mc_passes), desc="Bayesian MC passes", unit="pass")
			
 
				+        for pass_i in pass_iter:
			
 
				+            pass_iter.set_postfix_str(f"pass={pass_i + 1}/{mc_passes}")
			
 
				+            sample_iter = tqdm(
			
 
				+                holdout_loader,
			
 
				+                total=n_samples,
			
 
				+                desc=f"MC pass {pass_i + 1}",
			
 
				+                unit="batch",
			
 
				+                leave=False,
			
 
				+            )
			
 
				+            for sample_i, (mri, xls, label, img_id) in enumerate(sample_iter):
			
 
				                 mri_device = mri.float().to(device)
			
 
				                 xls_device = xls.float().to(device)
			
 
				                 output = model((mri_device, xls_device))
			
--- a/analysis/noise_analysis.py
+++ b/analysis/noise_analysis.py
@@ -9,6 +9,7 @@ import numpy as np
 
				 import pandas as pd
			
 
				 import torch
			
 
				 from bayesian_torch.utils.util import predictive_entropy
			
 
				+from tqdm.auto import tqdm
			
 
				 
			
 
				 from model.cnn import CNN3D
			
 
				 
			
@@ -132,7 +133,14 @@ def _infer_with_noise_ensemble(
 
				     all_true: list[int] = []
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for mri, xls, labels, _ in test_loader:
			
 
				+        batch_iter = tqdm(
			
 
				+            test_loader,
			
 
				+            total=len(test_loader),
			
 
				+            desc=f"ensemble sigma={sigma:g}",
			
 
				+            unit="batch",
			
 
				+            leave=False,
			
 
				+        )
			
 
				+        for mri, xls, labels, _ in batch_iter:
			
 
				             mri_device = mri.float().to(device)
			
 
				             xls_device = xls.float().to(device)
			
 
				             labels_device = labels.to(device)
			
@@ -144,7 +152,7 @@ def _infer_with_noise_ensemble(
 
				 
			
 
				             pred_mat = np.stack(preds, axis=0)
			
 
				             mean = pred_mat.mean(axis=0)
			
 
				-            confidence = np.abs(pred_mat - 0.5).mean(axis=0)
			
 
				+            confidence = mean
			
 
				             std = pred_mat.std(axis=0)
			
 
				             true = labels_device[:, class_index].detach().cpu().numpy().astype(int)
			
 
				 
			
@@ -176,7 +184,14 @@ def _infer_with_noise_bayesian(
 
				     all_true: list[int] = []
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for mri, xls, labels, _ in test_loader:
			
 
				+        batch_iter = tqdm(
			
 
				+            test_loader,
			
 
				+            total=len(test_loader),
			
 
				+            desc=f"bayesian sigma={sigma:g}",
			
 
				+            unit="batch",
			
 
				+            leave=False,
			
 
				+        )
			
 
				+        for mri, xls, labels, _ in batch_iter:
			
 
				             mri_device = mri.float().to(device)
			
 
				             xls_device = xls.float().to(device)
			
 
				             labels_device = labels.to(device)
			
@@ -188,7 +203,7 @@ def _infer_with_noise_bayesian(
 
				 
			
 
				             draw_mat = np.stack(draws, axis=0)  # (mc_passes, batch, classes)
			
 
				             mean = draw_mat.mean(axis=0)[:, class_index]
			
 
				-            confidence = np.abs(draw_mat[:, :, class_index] - 0.5).mean(axis=0)
			
 
				+            confidence = mean
			
 
				             entropy_uncertainty = predictive_entropy(draw_mat)
			
 
				             true = labels_device[:, class_index].detach().cpu().numpy().astype(int)
			
 
				 
			
@@ -237,7 +252,9 @@ def run_noise_analysis(
 
				     if backend == "ensemble":
			
 
				         models = _load_ensemble_models(config)
			
 
				         example_rows: list[tuple[float, torch.Tensor]] = []
			
 
				-        for sigma in noise_sigmas:
			
 
				+        sigma_iter = tqdm(noise_sigmas, desc="Noise sweep (ensemble)", unit="sigma")
			
 
				+        for sigma in sigma_iter:
			
 
				+            sigma_iter.set_postfix_str(f"sigma={sigma:g}")
			
 
				             y_true, y_prob, y_confidence, y_std = _infer_with_noise_ensemble(
			
 
				                 test_loader,
			
 
				                 models,
			
@@ -286,7 +303,9 @@ def run_noise_analysis(
 
				     elif backend == "bayesian":
			
 
				         model = _load_bayesian_model(config)
			
 
				         example_rows = []
			
 
				-        for sigma in noise_sigmas:
			
 
				+        sigma_iter = tqdm(noise_sigmas, desc="Noise sweep (bayesian)", unit="sigma")
			
 
				+        for sigma in sigma_iter:
			
 
				+            sigma_iter.set_postfix_str(f"sigma={sigma:g}")
			
 
				             y_true, y_prob, y_confidence, y_std = _infer_with_noise_bayesian(
			
 
				                 test_loader,
			
 
				                 model,
			
--- a/analysis/plotting.py
+++ b/analysis/plotting.py
@@ -10,7 +10,6 @@ import pandas as pd
 
				 import torch
			
 
				 from matplotlib.axes import Axes
			
 
				 
			
 
				-
			
 
				 # Easily editable plot text overrides by plot key.
			
 
				 # Example:
			
 
				 # "performance_threshold": {
			
@@ -124,6 +123,56 @@ def _plot_correct_incorrect_bars(
 
				     bars_ax.grid(False)
			
 
				 
			
 
				 
			
 
				+def save_coverage_bar_plot(
			
 
				+    x_values: pd.Series | np.ndarray,
			
 
				+    n_correct: pd.Series | np.ndarray,
			
 
				+    n_incorrect: pd.Series | np.ndarray,
			
 
				+    x_label: str,
			
 
				+    title: str,
			
 
				+    output_path: Path,
			
 
				+) -> None:
			
 
				+    """Save a standalone bar chart showing sample counts (correct vs incorrect)."""
			
 
				+    x = np.asarray(x_values, dtype=float)
			
 
				+    correct = np.asarray(n_correct, dtype=float)
			
 
				+    incorrect = np.asarray(n_incorrect, dtype=float)
			
 
				+    if x.size == 0 or correct.size == 0 or incorrect.size == 0:
			
 
				+        return
			
 
				+
			
 
				+    width = float(np.diff(np.sort(x)).min()) * 0.8 if x.size > 1 else 0.04
			
 
				+    max_count = float(max(np.nanmax(correct), np.nanmax(incorrect), 1.0))
			
 
				+
			
 
				+    fig, ax = plt.subplots(figsize=(10, 5))
			
 
				+    ax.bar(
			
 
				+        x,
			
 
				+        correct,
			
 
				+        width=width,
			
 
				+        color="#2ca02c",
			
 
				+        alpha=0.6,
			
 
				+        label="correct",
			
 
				+        align="center",
			
 
				+    )
			
 
				+    ax.bar(
			
 
				+        x,
			
 
				+        -incorrect,
			
 
				+        width=width,
			
 
				+        color="#d62728",
			
 
				+        alpha=0.6,
			
 
				+        label="incorrect",
			
 
				+        align="center",
			
 
				+    )
			
 
				+    ax.axhline(0.0, color="gray", linewidth=0.8, alpha=0.4)
			
 
				+    ax.set_ylim(-1.15 * max_count, 1.15 * max_count)
			
 
				+    ax.set_xlabel(x_label)
			
 
				+    ax.set_ylabel("Sample Count")
			
 
				+    ax.set_title(title)
			
 
				+    ax.legend()
			
 
				+    ax.grid(True, alpha=0.3)
			
 
				+    fig.tight_layout()
			
 
				+    output_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+    fig.savefig(output_path)
			
 
				+    plt.close(fig)
			
 
				+
			
 
				+
			
 
				 def plots_dir(output_dir: Path) -> Path:
			
 
				     plots = output_dir / "plots"
			
 
				     plots.mkdir(parents=True, exist_ok=True)
			
@@ -153,7 +202,6 @@ def save_performance_threshold_plot(
 
				     )
			
 
				 
			
 
				     fig, ax = plt.subplots(figsize=(10, 5))
			
 
				-    _plot_correct_incorrect_bars(ax, df["threshold"], n_correct, n_incorrect)
			
 
				     ax.plot(df["threshold"], df[metric_column], label=metric_label, marker="o")
			
 
				     ax.set_xlabel(x_label)
			
 
				     ax.set_ylabel(y_label)
			
@@ -165,6 +213,17 @@ def save_performance_threshold_plot(
 
				     fig.savefig(output_path)
			
 
				     plt.close(fig)
			
 
				 
			
 
				+    # Generate separate coverage bar plot
			
 
				+    coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
			
 
				+    save_coverage_bar_plot(
			
 
				+        x_values=df["threshold"],
			
 
				+        n_correct=n_correct,
			
 
				+        n_incorrect=n_incorrect,
			
 
				+        x_label=x_label,
			
 
				+        title=f"Sample Distribution vs Decision Threshold ({backend})",
			
 
				+        output_path=coverage_path,
			
 
				+    )
			
 
				+
			
 
				 
			
 
				 def save_performance_threshold_pair_plot(
			
 
				     df: pd.DataFrame,
			
@@ -191,7 +250,6 @@ def save_performance_threshold_pair_plot(
 
				         (axes[0], "accuracy", "Accuracy", "o"),
			
 
				         (axes[1], "f1", "F1", "s"),
			
 
				     ]:
			
 
				-        _plot_correct_incorrect_bars(ax, df["threshold"], n_correct, n_incorrect)
			
 
				         ax.plot(df["threshold"], df[metric_col], label=metric_label, marker=marker)
			
 
				         ax.set_xlabel(x_label)
			
 
				         ax.set_ylabel(metric_label)
			
@@ -205,6 +263,17 @@ def save_performance_threshold_pair_plot(
 
				     fig.savefig(output_path)
			
 
				     plt.close(fig)
			
 
				 
			
 
				+    # Generate separate coverage bar plot
			
 
				+    coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
			
 
				+    save_coverage_bar_plot(
			
 
				+        x_values=df["threshold"],
			
 
				+        n_correct=n_correct,
			
 
				+        n_incorrect=n_incorrect,
			
 
				+        x_label=x_label,
			
 
				+        title=f"Sample Distribution vs Decision Threshold ({backend})",
			
 
				+        output_path=coverage_path,
			
 
				+    )
			
 
				+
			
 
				 
			
 
				 def save_uncertainty_cutoff_plot(
			
 
				     cutoff_df: pd.DataFrame,
			
@@ -223,23 +292,6 @@ def save_uncertainty_cutoff_plot(
 
				     )
			
 
				 
			
 
				     fig, ax = plt.subplots(figsize=(10, 5))
			
 
				-    first_group = (
			
 
				-        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
			
 
				-        .groupby("uncertainty_type", as_index=False)
			
 
				-        .head(1)
			
 
				-    )
			
 
				-    if not first_group.empty:
			
 
				-        # Draw count bars once; uncertainty lines are overlaid afterwards.
			
 
				-        rep_name = str(first_group.iloc[0]["uncertainty_type"])
			
 
				-        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
			
 
				-            "restriction_level"
			
 
				-        )
			
 
				-        _plot_correct_incorrect_bars(
			
 
				-            ax,
			
 
				-            rep["restriction_level"],
			
 
				-            pd.to_numeric(rep["n_correct"], errors="coerce"),
			
 
				-            pd.to_numeric(rep["n_incorrect"], errors="coerce"),
			
 
				-        )
			
 
				 
			
 
				     for uncertainty_name, group in cutoff_df.groupby("uncertainty_type"):
			
 
				         g = group.sort_values("restriction_level")
			
@@ -260,6 +312,27 @@ def save_uncertainty_cutoff_plot(
 
				     fig.savefig(output_path)
			
 
				     plt.close(fig)
			
 
				 
			
 
				+    # Generate separate coverage bar plot
			
 
				+    first_group = (
			
 
				+        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
			
 
				+        .groupby("uncertainty_type", as_index=False)
			
 
				+        .head(1)
			
 
				+    )
			
 
				+    if not first_group.empty:
			
 
				+        rep_name = str(first_group.iloc[0]["uncertainty_type"])
			
 
				+        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
			
 
				+            "restriction_level"
			
 
				+        )
			
 
				+        coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
			
 
				+        save_coverage_bar_plot(
			
 
				+            x_values=rep["restriction_level"],
			
 
				+            n_correct=pd.to_numeric(rep["n_correct"], errors="coerce"),
			
 
				+            n_incorrect=pd.to_numeric(rep["n_incorrect"], errors="coerce"),
			
 
				+            x_label=x_label_final,
			
 
				+            title=f"Sample Coverage vs {title_prefix}",
			
 
				+            output_path=coverage_path,
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 def save_uncertainty_cutoff_pair_plot(
			
 
				     cutoff_df: pd.DataFrame,
			
@@ -276,23 +349,6 @@ def save_uncertainty_cutoff_pair_plot(
 
				     )
			
 
				 
			
 
				     fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharex=True)
			
 
				-    first_group = (
			
 
				-        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
			
 
				-        .groupby("uncertainty_type", as_index=False)
			
 
				-        .head(1)
			
 
				-    )
			
 
				-    if not first_group.empty:
			
 
				-        rep_name = str(first_group.iloc[0]["uncertainty_type"])
			
 
				-        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
			
 
				-            "restriction_level"
			
 
				-        )
			
 
				-        for ax in axes:
			
 
				-            _plot_correct_incorrect_bars(
			
 
				-                ax,
			
 
				-                rep["restriction_level"],
			
 
				-                pd.to_numeric(rep["n_correct"], errors="coerce"),
			
 
				-                pd.to_numeric(rep["n_incorrect"], errors="coerce"),
			
 
				-            )
			
 
				 
			
 
				     for uncertainty_name, group in cutoff_df.groupby("uncertainty_type"):
			
 
				         g = group.sort_values("restriction_level")
			
@@ -317,6 +373,27 @@ def save_uncertainty_cutoff_pair_plot(
 
				     fig.savefig(output_path)
			
 
				     plt.close(fig)
			
 
				 
			
 
				+    # Generate separate coverage bar plot
			
 
				+    first_group = (
			
 
				+        cutoff_df.sort_values(["uncertainty_type", "restriction_level"])
			
 
				+        .groupby("uncertainty_type", as_index=False)
			
 
				+        .head(1)
			
 
				+    )
			
 
				+    if not first_group.empty:
			
 
				+        rep_name = str(first_group.iloc[0]["uncertainty_type"])
			
 
				+        rep = cutoff_df[cutoff_df["uncertainty_type"] == rep_name].sort_values(
			
 
				+            "restriction_level"
			
 
				+        )
			
 
				+        coverage_path = output_path.parent / f"{output_path.stem}_coverage.png"
			
 
				+        save_coverage_bar_plot(
			
 
				+            x_values=rep["restriction_level"],
			
 
				+            n_correct=pd.to_numeric(rep["n_correct"], errors="coerce"),
			
 
				+            n_incorrect=pd.to_numeric(rep["n_incorrect"], errors="coerce"),
			
 
				+            x_label=x_label_final,
			
 
				+            title=f"Sample Coverage vs {title_prefix}",
			
 
				+            output_path=coverage_path,
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 def save_calibration_plot(per_bin: np.ndarray, backend: str, output_path: Path) -> None:
			
 
				     title, x_label, y_label = _resolve_plot_text(
			
--- a/analysis/regenerate_plots.py
+++ b/analysis/regenerate_plots.py
@@ -0,0 +1,327 @@
 
				+# pyright: basic
			
 
				+
			
 
				+"""Regenerate analysis plots from existing computed data (CSV files).
			
 
				+
			
 
				+This script regenerates all plots from previously computed analysis results
			
 
				+without re-running the full analysis pipeline. Useful when making changes
			
 
				+to plotting parameters or fixing visualizations.
			
 
				+
			
 
				+Usage: Run from the project root (alnn_rewrite directory):
			
 
				+    python analysis/regenerate_plots.py /path/to/run_directory/backend_name
			
 
				+
			
 
				+Example:
			
 
				+    python analysis/regenerate_plots.py analysis_output/run_20260428_120000/ensemble
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import argparse
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+from typing import Any
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+# Add parent directory to path for imports
			
 
				+sys.path.insert(0, str(Path(__file__).parent.parent))
			
 
				+
			
 
				+from analysis.analysis_modules import _uncertainty_cutoff_analysis
			
 
				+from analysis.defaults import (
			
 
				+    DEFAULT_CALIBRATION_BINS,
			
 
				+    DEFAULT_DECISION_THRESHOLD,
			
 
				+    uncertainty_cutoff_percentiles,
			
 
				+)
			
 
				+from analysis.plotting import (
			
 
				+    plots_dir,
			
 
				+    save_calibration_plot,
			
 
				+    save_performance_threshold_pair_plot,
			
 
				+    save_performance_threshold_plot,
			
 
				+    save_uncertainty_cutoff_pair_plot,
			
 
				+    save_uncertainty_cutoff_plot,
			
 
				+)
			
 
				+from analysis.runtime import write_json
			
 
				+
			
 
				+
			
 
				+def _plot_description(filename: str) -> str:
			
 
				+    descriptions = {
			
 
				+        "performance_threshold_accuracy.png": "Accuracy as the decision threshold varies.",
			
 
				+        "performance_threshold_f1.png": "F1 score as the decision threshold varies.",
			
 
				+        "performance_threshold_accuracy_f1.png": "Accuracy and F1 shown side-by-side as the decision threshold varies.",
			
 
				+        "performance_uncertainty_cutoff_accuracy.png": "Accuracy while progressively restricting to higher-confidence and uncertainty-metric subsets.",
			
 
				+        "performance_uncertainty_cutoff_f1.png": "F1 score while progressively restricting to higher-confidence and uncertainty-metric subsets.",
			
 
				+        "performance_uncertainty_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across uncertainty-cutoff restriction levels.",
			
 
				+        "performance_uncertainty_percentile_cutoff_accuracy.png": "Accuracy from least to most restricted percentile-wise subset selection.",
			
 
				+        "performance_uncertainty_percentile_cutoff_f1.png": "F1 score from least to most restricted percentile-wise subset selection.",
			
 
				+        "performance_uncertainty_percentile_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across percentile-floor restriction levels.",
			
 
				+        "calibration_reliability.png": "Reliability diagram comparing predicted probability to empirical outcome frequency.",
			
 
				+        "performance_threshold_accuracy_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
			
 
				+        "performance_threshold_f1_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
			
 
				+        "performance_threshold_accuracy_f1_coverage.png": "Sample distribution (correct vs incorrect) across decision thresholds.",
			
 
				+        "performance_uncertainty_cutoff_accuracy_coverage.png": "Sample coverage breakdown across restriction levels.",
			
 
				+        "performance_uncertainty_cutoff_f1_coverage.png": "Sample coverage breakdown across restriction levels.",
			
 
				+        "performance_uncertainty_cutoff_accuracy_f1_coverage.png": "Sample coverage breakdown across restriction levels.",
			
 
				+        "performance_uncertainty_percentile_cutoff_accuracy_coverage.png": "Sample coverage breakdown as percentile floor increases.",
			
 
				+        "performance_uncertainty_percentile_cutoff_f1_coverage.png": "Sample coverage breakdown as percentile floor increases.",
			
 
				+        "performance_uncertainty_percentile_cutoff_accuracy_f1_coverage.png": "Sample coverage breakdown as percentile floor increases.",
			
 
				+    }
			
 
				+    return descriptions.get(filename, "Generated analysis plot.")
			
 
				+
			
 
				+
			
 
				+def _write_backend_plot_report(backend: str, out_dir: Path) -> Path:
			
 
				+    plots = out_dir / "plots"
			
 
				+    images = sorted(plots.rglob("*.png")) if plots.exists() else []
			
 
				+
			
 
				+    report_path = out_dir / "plots_report.md"
			
 
				+    lines = [
			
 
				+        f"# {backend.title()} Analysis Plot Report (Regenerated)",
			
 
				+        "",
			
 
				+        "This document lists regenerated analysis plots with brief descriptions.",
			
 
				+        "",
			
 
				+    ]
			
 
				+    if not images:
			
 
				+        lines.append("No plot images were found for this backend run.")
			
 
				+    else:
			
 
				+        for image_path in images:
			
 
				+            rel = image_path.relative_to(out_dir).as_posix()
			
 
				+            title = image_path.stem.replace("_", " ").title()
			
 
				+            lines.append(f"## {title}")
			
 
				+            lines.append(_plot_description(image_path.name))
			
 
				+            lines.append("")
			
 
				+            lines.append(f"![{title}]({rel})")
			
 
				+            lines.append("")
			
 
				+
			
 
				+    report_path.write_text("\n".join(lines), encoding="utf-8")
			
 
				+    return report_path
			
 
				+
			
 
				+
			
 
				+def regenerate_performance_plots(backend_dir: Path) -> dict[str, Any]:
			
 
				+    """Regenerate performance threshold plots from existing CSV."""
			
 
				+    perf_csv = backend_dir / "performance_threshold_sweep.csv"
			
 
				+    if not perf_csv.exists():
			
 
				+        return {"status": "skipped", "reason": "no performance_threshold_sweep.csv"}
			
 
				+
			
 
				+    df = pd.read_csv(perf_csv)
			
 
				+    backend = backend_dir.name if backend_dir.name != "plots" else "ensemble"
			
 
				+
			
 
				+    # Get backend name from parent directory name if not found
			
 
				+    if backend_dir.parent.name not in ["ensemble", "bayesian"]:
			
 
				+        parent_name = backend_dir.name
			
 
				+        if parent_name in {"ensemble", "bayesian"}:
			
 
				+            backend = parent_name
			
 
				+
			
 
				+    accuracy_plot_path = plots_dir(backend_dir) / "performance_threshold_accuracy.png"
			
 
				+    f1_plot_path = plots_dir(backend_dir) / "performance_threshold_f1.png"
			
 
				+    pair_plot_path = plots_dir(backend_dir) / "performance_threshold_accuracy_f1.png"
			
 
				+
			
 
				+    save_performance_threshold_plot(
			
 
				+        df=df,
			
 
				+        backend=backend,
			
 
				+        output_path=accuracy_plot_path,
			
 
				+        metric_column="accuracy",
			
 
				+        metric_label="Accuracy",
			
 
				+        plot_key="performance_threshold_accuracy",
			
 
				+    )
			
 
				+    save_performance_threshold_plot(
			
 
				+        df=df,
			
 
				+        backend=backend,
			
 
				+        output_path=f1_plot_path,
			
 
				+        metric_column="f1",
			
 
				+        metric_label="F1",
			
 
				+        plot_key="performance_threshold_f1",
			
 
				+    )
			
 
				+    save_performance_threshold_pair_plot(
			
 
				+        df=df,
			
 
				+        backend=backend,
			
 
				+        output_path=pair_plot_path,
			
 
				+        plot_key="performance_threshold_accuracy_f1",
			
 
				+    )
			
 
				+
			
 
				+    return {
			
 
				+        "status": "regenerated",
			
 
				+        "performance_threshold_accuracy": str(accuracy_plot_path),
			
 
				+        "performance_threshold_f1": str(f1_plot_path),
			
 
				+        "performance_threshold_accuracy_f1": str(pair_plot_path),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def regenerate_uncertainty_cutoff_plots(backend_dir: Path) -> dict[str, Any]:
			
 
				+    """Regenerate uncertainty cutoff plots from existing CSV."""
			
 
				+    cutoff_csv = backend_dir / "performance_uncertainty_cutoff.csv"
			
 
				+    percentile_csv = backend_dir / "performance_uncertainty_percentile_cutoff.csv"
			
 
				+
			
 
				+    results = {"status": "skipped", "reason": "no cutoff CSV files found"}
			
 
				+
			
 
				+    if cutoff_csv.exists():
			
 
				+        cutoff_df = pd.read_csv(cutoff_csv)
			
 
				+        results["status"] = "regenerated"
			
 
				+
			
 
				+        # Create plots by uncertainty type
			
 
				+        for uncertainty_name in sorted(pd.unique(cutoff_df["uncertainty_type"])):
			
 
				+            sub_df = cutoff_df[cutoff_df["uncertainty_type"] == uncertainty_name].copy()
			
 
				+            slug = uncertainty_name.lower().replace(" ", "_")
			
 
				+
			
 
				+            sub_accuracy_plot_path = (
			
 
				+                plots_dir(backend_dir)
			
 
				+                / f"performance_uncertainty_cutoff_{slug}_accuracy.png"
			
 
				+            )
			
 
				+            sub_f1_plot_path = (
			
 
				+                plots_dir(backend_dir) / f"performance_uncertainty_cutoff_{slug}_f1.png"
			
 
				+            )
			
 
				+            sub_pair_plot_path = (
			
 
				+                plots_dir(backend_dir)
			
 
				+                / f"performance_uncertainty_cutoff_{slug}_accuracy_f1.png"
			
 
				+            )
			
 
				+
			
 
				+            save_uncertainty_cutoff_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
			
 
				+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
			
 
				+                output_path=sub_accuracy_plot_path,
			
 
				+                metric_column="accuracy",
			
 
				+                metric_label="Accuracy",
			
 
				+                plot_key="performance_uncertainty_cutoff_accuracy",
			
 
				+            )
			
 
				+            save_uncertainty_cutoff_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
			
 
				+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
			
 
				+                output_path=sub_f1_plot_path,
			
 
				+                metric_column="f1",
			
 
				+                metric_label="F1",
			
 
				+                plot_key="performance_uncertainty_cutoff_f1",
			
 
				+            )
			
 
				+            save_uncertainty_cutoff_pair_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Cutoff Percentile",
			
 
				+                x_label="Restriction Level (0 = all samples, 100 = most restricted subset)",
			
 
				+                output_path=sub_pair_plot_path,
			
 
				+                plot_key="performance_uncertainty_cutoff_accuracy_f1",
			
 
				+            )
			
 
				+
			
 
				+    if percentile_csv.exists():
			
 
				+        percentile_df = pd.read_csv(percentile_csv)
			
 
				+        results["status"] = "regenerated"
			
 
				+
			
 
				+        # Create plots by uncertainty type
			
 
				+        for uncertainty_name in sorted(pd.unique(percentile_df["uncertainty_type"])):
			
 
				+            sub_df = percentile_df[
			
 
				+                percentile_df["uncertainty_type"] == uncertainty_name
			
 
				+            ].copy()
			
 
				+            slug = uncertainty_name.lower().replace(" ", "_")
			
 
				+
			
 
				+            sub_accuracy_plot_path = (
			
 
				+                plots_dir(backend_dir)
			
 
				+                / f"performance_uncertainty_percentile_cutoff_{slug}_accuracy.png"
			
 
				+            )
			
 
				+            sub_f1_plot_path = (
			
 
				+                plots_dir(backend_dir)
			
 
				+                / f"performance_uncertainty_percentile_cutoff_{slug}_f1.png"
			
 
				+            )
			
 
				+            sub_pair_plot_path = (
			
 
				+                plots_dir(backend_dir)
			
 
				+                / f"performance_uncertainty_percentile_cutoff_{slug}_accuracy_f1.png"
			
 
				+            )
			
 
				+
			
 
				+            save_uncertainty_cutoff_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Percentile Floor",
			
 
				+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
			
 
				+                output_path=sub_accuracy_plot_path,
			
 
				+                metric_column="accuracy",
			
 
				+                metric_label="Accuracy",
			
 
				+                plot_key="performance_uncertainty_percentile_cutoff_accuracy",
			
 
				+            )
			
 
				+            save_uncertainty_cutoff_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Percentile Floor",
			
 
				+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
			
 
				+                output_path=sub_f1_plot_path,
			
 
				+                metric_column="f1",
			
 
				+                metric_label="F1",
			
 
				+                plot_key="performance_uncertainty_percentile_cutoff_f1",
			
 
				+            )
			
 
				+            save_uncertainty_cutoff_pair_plot(
			
 
				+                cutoff_df=sub_df,
			
 
				+                title_prefix="Model Output / Uncertainty Percentile Floor",
			
 
				+                x_label="Percentile Floor (0 = all samples, 100 = top percentile subset)",
			
 
				+                output_path=sub_pair_plot_path,
			
 
				+                plot_key="performance_uncertainty_percentile_cutoff_accuracy_f1",
			
 
				+            )
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				+def regenerate_calibration_plots(backend_dir: Path) -> dict[str, Any]:
			
 
				+    """Regenerate calibration plots from existing calibration data."""
			
 
				+    calib_path = backend_dir / "calibration_per_bin.npy"
			
 
				+    if not calib_path.exists():
			
 
				+        return {"status": "skipped", "reason": "no calibration_per_bin.npy"}
			
 
				+
			
 
				+    per_bin = np.load(calib_path)
			
 
				+    backend = backend_dir.name if backend_dir.name != "plots" else "ensemble"
			
 
				+
			
 
				+    # Get backend name from parent directory name if not found
			
 
				+    if backend_dir.parent.name not in ["ensemble", "bayesian"]:
			
 
				+        parent_name = backend_dir.name
			
 
				+        if parent_name in {"ensemble", "bayesian"}:
			
 
				+            backend = parent_name
			
 
				+
			
 
				+    plot_path = plots_dir(backend_dir) / "calibration_reliability.png"
			
 
				+    save_calibration_plot(per_bin=per_bin, backend=backend, output_path=plot_path)
			
 
				+
			
 
				+    return {
			
 
				+        "status": "regenerated",
			
 
				+        "calibration_reliability": str(plot_path),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def main() -> None:
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="Regenerate analysis plots from existing computed data CSV files."
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "backend_dir",
			
 
				+        type=Path,
			
 
				+        help="Path to backend-specific analysis output directory "
			
 
				+        "(e.g., analysis_output/run_xxx/ensemble)",
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    backend_dir = args.backend_dir.resolve()
			
 
				+
			
 
				+    if not backend_dir.exists():
			
 
				+        print(
			
 
				+            f"Error: Backend directory does not exist: {backend_dir}", file=sys.stderr
			
 
				+        )
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    print(f"Regenerating plots from: {backend_dir}")
			
 
				+
			
 
				+    results: dict[str, Any] = {
			
 
				+        "backend_dir": str(backend_dir),
			
 
				+        "performance": regenerate_performance_plots(backend_dir),
			
 
				+        "uncertainty_cutoff": regenerate_uncertainty_cutoff_plots(backend_dir),
			
 
				+        "calibration": regenerate_calibration_plots(backend_dir),
			
 
				+    }
			
 
				+
			
 
				+    # Write updated report
			
 
				+    report_path = _write_backend_plot_report(
			
 
				+        backend=backend_dir.name, out_dir=backend_dir
			
 
				+    )
			
 
				+    results["plots_report"] = str(report_path)
			
 
				+
			
 
				+    print(f"\nPlot regeneration complete!")
			
 
				+    print(f"Results summary:")
			
 
				+    print(f"  Performance plots: {results['performance'].get('status', 'unknown')}")
			
 
				+    print(
			
 
				+        f"  Uncertainty cutoff plots: {results['uncertainty_cutoff'].get('status', 'unknown')}"
			
 
				+    )
			
 
				+    print(f"  Calibration plots: {results['calibration'].get('status', 'unknown')}")
			
 
				+    print(f"  Report written to: {report_path}")
			
 
				+
			
 
				+    write_json(backend_dir / "plot_regeneration_log.json", results)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()