| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- # pyright: basic
- from __future__ import annotations
- from pathlib import Path
- from typing import Any
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- from scipy import stats
- from .plotting import annotate_stats_box, plots_dir
- from .runtime import write_json
- def _fit_line(metric: np.ndarray, accuracy: np.ndarray) -> dict[str, float]:
- x = np.asarray(metric, dtype=float)
- y = np.asarray(accuracy, dtype=float)
- if len(y) < 3:
- raise ValueError("Need at least 3 points for linear regression")
- reg = stats.linregress(x, y)
- return {
- "intercept": float(reg.intercept),
- "slope": float(reg.slope),
- "r_value": float(reg.rvalue),
- "p_value": float(reg.pvalue),
- "stderr": float(reg.stderr),
- "r_squared": float(reg.rvalue**2),
- }
- def _metric_specs_for_backend(backend: str, df: pd.DataFrame) -> list[tuple[str, str]]:
- specs: list[tuple[str, str]] = []
- if "mean_confidence" in df.columns:
- specs.append(("mean_confidence", "Confidence"))
- elif "mean_model_output_probability" in df.columns:
- # Backward-compatibility fallback for older CSV outputs.
- specs.append(("mean_model_output_probability", "Confidence"))
- if backend == "bayesian" and "mean_predictive_entropy" in df.columns:
- specs.append(("mean_predictive_entropy", "Predictive Uncertainty"))
- elif "mean_std" in df.columns:
- specs.append(("mean_std", "Standard Deviation"))
- return specs
- def run_noise_accuracy_uncertainty_analysis(
- noise_df: pd.DataFrame,
- backend: str,
- output_dir: Path,
- ) -> dict[str, Any]:
- required_cols = ["noise_factor", "accuracy"]
- missing = [c for c in required_cols if c not in noise_df.columns]
- if missing:
- raise KeyError(f"Missing required columns in noise dataframe: {missing}")
- metric_specs = _metric_specs_for_backend(backend, noise_df)
- if not metric_specs:
- raise ValueError("No uncertainty metrics available for correlation analysis")
- plot_path = plots_dir(output_dir) / "noise_accuracy_uncertainty_2d.png"
- stats_rows: list[dict[str, Any]] = []
- fig, axes = plt.subplots(1, len(metric_specs), figsize=(8 * len(metric_specs), 6))
- if len(metric_specs) == 1:
- axes = np.asarray([axes])
- for idx, (metric_col, metric_label) in enumerate(metric_specs, start=1):
- if metric_col not in noise_df.columns:
- continue
- df = noise_df[["noise_factor", metric_col, "accuracy"]].copy()
- df = df.replace([np.inf, -np.inf], np.nan).dropna()
- if len(df) < 4:
- continue
- noise = np.asarray(df["noise_factor"], dtype=float)
- metric = np.asarray(df[metric_col], dtype=float)
- accuracy = np.asarray(df["accuracy"], dtype=float)
- pearson = stats.pearsonr(metric, accuracy)
- fit = _fit_line(metric=metric, accuracy=accuracy)
- stats_rows.append(
- {
- "backend": backend,
- "metric_column": metric_col,
- "metric_label": metric_label,
- "n_points": int(len(df)),
- "pearson_r_metric_vs_accuracy": float(pearson.statistic),
- "p_value_metric_vs_accuracy": float(pearson.pvalue),
- "regression_intercept": float(fit["intercept"]),
- "regression_slope": float(fit["slope"]),
- "regression_slope_stderr": float(fit["stderr"]),
- "regression_r_squared": float(fit["r_squared"]),
- }
- )
- ax = axes[idx - 1]
- scatter = ax.scatter(
- metric,
- accuracy,
- c=noise,
- cmap="viridis",
- s=42,
- edgecolors="none",
- )
- x_line = np.linspace(float(np.min(metric)), float(np.max(metric)), num=200)
- y_line = fit["intercept"] + fit["slope"] * x_line
- ax.plot(x_line, y_line, color="#1f77b4", linewidth=2.0, label="Linear fit")
- ax.set_xlabel(metric_label)
- ax.set_ylabel("Accuracy")
- ax.set_title(f"{backend.title()} - {metric_label} vs Accuracy")
- ax.grid(True, alpha=0.3)
- ax.legend()
- annotate_stats_box(
- ax,
- lines=[
- f"Pearson r = {pearson.statistic:.3f}",
- f"p-value = {pearson.pvalue:.3g}",
- f"R^2 = {fit['r_squared']:.3f}",
- ],
- location="upper left",
- )
- cbar = fig.colorbar(scatter, ax=ax)
- cbar.set_label("Noise Factor")
- fig.tight_layout()
- plot_path.parent.mkdir(parents=True, exist_ok=True)
- fig.savefig(plot_path)
- plt.close(fig)
- stats_df = pd.DataFrame(stats_rows)
- stats_csv = output_dir / "noise_accuracy_uncertainty_stats.csv"
- stats_df.to_csv(stats_csv, index=False)
- summary_md = output_dir / "noise_accuracy_uncertainty_summary.md"
- lines = [
- f"# Noise Accuracy-Uncertainty Analysis ({backend})",
- "",
- "This analysis collapses the noise axis and fits a 2D linear relationship between uncertainty metric and accuracy.",
- "Noise factor is encoded as point color in the plot.",
- "",
- "## Metrics",
- ]
- if stats_df.empty:
- lines.append("- No valid metric rows were available for regression.")
- else:
- for _, row in stats_df.iterrows():
- lines.extend(
- [
- f"- {row['metric_label']}: Pearson r={row['pearson_r_metric_vs_accuracy']:.4f}, p={row['p_value_metric_vs_accuracy']:.4g}",
- f" - Regression R^2: {row['regression_r_squared']:.4f}",
- ]
- )
- summary_md.write_text("\n".join(lines), encoding="utf-8")
- payload = {
- "backend": backend,
- "plot": str(plot_path),
- "table": str(stats_csv),
- "summary_markdown": str(summary_md),
- "rows": int(len(stats_df)),
- }
- write_json(output_dir / "noise_accuracy_uncertainty_summary.json", payload)
- return payload
|