# pyright: basic

from __future__ import annotations

import argparse
from pathlib import Path
from typing import Any

import pandas as pd
from tqdm.auto import tqdm

from analysis.analysis_modules import (
    run_calibration,
    run_longitudinal,
    run_performance,
    run_physician,
)
from analysis.dataset_summary import run_dataset_summary
from analysis.data_access import load_backend_evaluation, load_clinical_table
from analysis.defaults import (
    DEFAULT_BACKENDS,
    DEFAULT_BAYESIAN_MC_PASSES,
    DEFAULT_CALIBRATION_BINS,
    DEFAULT_DECISION_THRESHOLD,
    DEFAULT_POSITIVE_CLASS_INDEX,
    noise_factor_grid,
    threshold_grid,
)
from analysis.holdout_evaluation import ensure_backend_netcdf
from analysis.longitudinal_audit import run_longitudinal_breakdown_audit
from analysis.noise_correlation import run_noise_accuracy_uncertainty_analysis
from analysis.noise_analysis import run_noise_analysis
from analysis.runtime import backend_dir, init_runtime_paths, load_config, write_json


def _plot_description(filename: str) -> str:
    descriptions = {
        "performance_threshold_accuracy.png": "Accuracy as the decision threshold varies.",
        "performance_threshold_f1.png": "F1 score as the decision threshold varies.",
        "performance_threshold_accuracy_f1.png": "Accuracy and F1 shown side-by-side as the decision threshold varies.",
        "performance_uncertainty_cutoff_accuracy.png": "Accuracy while progressively restricting to higher-confidence and uncertainty-metric subsets.",
        "performance_uncertainty_cutoff_f1.png": "F1 score while progressively restricting to higher-confidence and uncertainty-metric subsets.",
        "performance_uncertainty_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across uncertainty-cutoff restriction levels.",
        "performance_uncertainty_percentile_cutoff_accuracy.png": "Accuracy from least to most restricted percentile-wise subset selection.",
        "performance_uncertainty_percentile_cutoff_f1.png": "F1 score from least to most restricted percentile-wise subset selection.",
        "performance_uncertainty_percentile_cutoff_accuracy_f1.png": "Accuracy and F1 shown side-by-side across percentile-floor restriction levels.",
        "calibration_reliability.png": "Reliability diagram comparing predicted probability to empirical outcome frequency.",
        "physician_confidence_boxplot.png": "Confidence grouped by physician confidence ratings.",
        "physician_std_boxplot.png": "Standard deviation grouped by physician confidence ratings.",
        "physician_predictive_entropy_boxplot.png": "Predictive uncertainty grouped by physician confidence ratings.",
        "longitudinal_cohort_confidence.png": "Longitudinal cohort comparison using confidence.",
        "longitudinal_cohort_std.png": "Longitudinal cohort comparison using standard deviation.",
        "longitudinal_cohort_predictive_entropy.png": "Longitudinal cohort comparison using predictive uncertainty.",
        "noise_sensitivity_accuracy.png": "Accuracy trend across increasing Gaussian noise factors.",
        "noise_sensitivity_f1.png": "F1 trend across increasing Gaussian noise factors.",
        "noise_sensitivity_accuracy_f1.png": "Accuracy and F1 shown side-by-side across increasing Gaussian noise factors.",
        "noise_confidence.png": "Confidence trend across increasing Gaussian noise factors.",
        "noise_standard_deviation.png": "Standard deviation trend across increasing Gaussian noise factors.",
        "noise_confidence_standard_deviation.png": "Confidence and standard deviation shown side-by-side across increasing Gaussian noise factors.",
        "noise_predictive_uncertainty.png": "Predictive uncertainty trend across increasing Gaussian noise factors.",
        "noise_confidence_predictive_uncertainty.png": "Confidence and predictive uncertainty shown side-by-side across increasing Gaussian noise factors.",
        "noise_accuracy_uncertainty_2d.png": "2D uncertainty-vs-accuracy relationship with linear fit (noise factor encoded by color).",
        "ensemble_noise_examples.png": "Representative noisy image slices across selected Gaussian noise factors.",
        "bayesian_noise_examples.png": "Representative noisy image slices across selected Gaussian noise factors.",
        "ensemble_clean_scan_example.png": "Example clean scan image with no added noise.",
        "bayesian_clean_scan_example.png": "Example clean scan image with no added noise.",
    }
    return descriptions.get(filename, "Generated analysis plot.")


def _write_backend_plot_report(backend: str, out_dir: Path) -> Path:
    plots_dir = out_dir / "plots"
    images = sorted(plots_dir.rglob("*.png")) if plots_dir.exists() else []

    report_path = out_dir / "plots_report.md"
    lines = [
        f"# {backend.title()} Analysis Plot Report",
        "",
        "This document lists generated analysis plots with brief descriptions.",
        "",
    ]
    if not images:
        lines.append("No plot images were generated for this backend run.")
    else:
        for image_path in images:
            rel = image_path.relative_to(out_dir).as_posix()
            title = image_path.stem.replace("_", " ").title()
            lines.append(f"## {title}")
            lines.append(_plot_description(image_path.name))
            lines.append("")
            lines.append(f"![{title}]({rel})")
            lines.append("")

    report_path.write_text("\n".join(lines), encoding="utf-8")
    return report_path


def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            "Run modular evaluation analyses for ensemble and bayesian models. "
            "All outputs are written to alnn_rewrite/analysis_output."
        )
    )
    parser.add_argument(
        "--backend",
        nargs="+",
        choices=["ensemble", "bayesian"],
        default=DEFAULT_BACKENDS,
        help="Backends to evaluate.",
    )
    parser.add_argument(
        "--run-name",
        default=None,
        help="Optional run directory name under analysis_output.",
    )
    parser.add_argument(
        "--skip-noise",
        action="store_true",
        help="Skip Gaussian noise sensitivity analysis.",
    )
    parser.add_argument(
        "--longitudinal-breakdown-only",
        action="store_true",
        help=(
            "Run only longitudinal cohort breakdown audit from existing model "
            "evaluation outputs (no full analysis rerun)."
        ),
    )
    parser.add_argument(
        "--noise-correlation-only",
        action="store_true",
        help=(
            "Run only the noise uncertainty-vs-accuracy correlation/regression "
            "analysis from an existing noise_sensitivity.csv per backend."
        ),
    )
    parser.add_argument(
        "--dataset-summary-only",
        action="store_true",
        help=(
            "Generate only dataset composition summary documentation "
            "(overall and train/validation/test class breakdown)."
        ),
    )

    args = parser.parse_args()
    only_modes = [
        bool(args.longitudinal_breakdown_only),
        bool(args.noise_correlation_only),
        bool(args.dataset_summary_only),
    ]
    if sum(only_modes) > 1:
        parser.error(
            "Only one of --longitudinal-breakdown-only, "
            "--noise-correlation-only, and --dataset-summary-only may be used at once."
        )

    return args


def _run_longitudinal_breakdown_only(
    config: dict[str, Any],
    backend: str,
    clinical_df: pd.DataFrame,
    out_dir: Path,
) -> dict[str, Any]:
    evaluation = load_backend_evaluation(
        config=config,
        backend=backend,
        class_index=DEFAULT_POSITIVE_CLASS_INDEX,
    )
    summary = run_longitudinal_breakdown_audit(
        evaluation=evaluation,
        clinical_df=clinical_df,
        output_dir=out_dir,
    )
    write_json(out_dir / "longitudinal_breakdown_backend_summary.json", summary)
    return summary


def _run_noise_correlation_only(
    backend: str,
    out_dir: Path,
) -> dict[str, Any]:
    noise_table_path = out_dir / "noise_sensitivity.csv"
    if not noise_table_path.exists():
        raise FileNotFoundError(
            f"Expected existing noise table for --noise-correlation-only: {noise_table_path}"
        )

    noise_df = pd.read_csv(noise_table_path)
    summary = run_noise_accuracy_uncertainty_analysis(
        noise_df=noise_df,
        backend=backend,
        output_dir=out_dir,
    )
    write_json(out_dir / "noise_accuracy_uncertainty_backend_summary.json", summary)
    return summary


def _run_backend(
    config: dict[str, Any],
    root_dir: Path,
    backend: str,
    clinical_df: pd.DataFrame,
    skip_noise: bool,
    out_dir: Path,
) -> dict[str, Any]:
    netcdf_path = ensure_backend_netcdf(
        config=config,
        root_dir=root_dir,
        backend=backend,
        bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
    )

    evaluation = load_backend_evaluation(
        config=config,
        backend=backend,
        class_index=DEFAULT_POSITIVE_CLASS_INDEX,
    )

    thresholds = threshold_grid()
    noise_factors = noise_factor_grid()

    summary: dict[str, Any] = {
        "backend": backend,
        "netcdf": str(netcdf_path),
        "source_file": str(evaluation.source_file),
        "uncertainty_metric": evaluation.uncertainty_metric,
    }

    n_stages = 4 + (0 if skip_noise else 2)
    stage_bar = tqdm(
        total=n_stages,
        desc=f"[{backend}] analysis stages",
        unit="stage",
        leave=False,
    )
    try:
        stage_bar.set_postfix_str("performance")
        summary["performance"] = run_performance(
            evaluation=evaluation,
            output_dir=out_dir,
            thresholds=thresholds,
        )
        stage_bar.update(1)

        stage_bar.set_postfix_str("calibration")
        summary["calibration"] = run_calibration(
            evaluation=evaluation,
            output_dir=out_dir,
            bins=DEFAULT_CALIBRATION_BINS,
        )
        stage_bar.update(1)

        stage_bar.set_postfix_str("physician")
        summary["physician"] = run_physician(
            evaluation=evaluation,
            clinical_df=clinical_df,
            output_dir=out_dir,
        )
        stage_bar.update(1)

        stage_bar.set_postfix_str("longitudinal")
        summary["longitudinal"] = run_longitudinal(
            evaluation=evaluation,
            clinical_df=clinical_df,
            output_dir=out_dir,
        )
        stage_bar.update(1)

        if skip_noise:
            summary["noise"] = {"skipped": True, "reason": "--skip-noise supplied"}
            summary["noise_accuracy_uncertainty"] = {
                "skipped": True,
                "reason": "Noise analysis skipped, so no noise table available.",
            }
        else:
            try:
                stage_bar.set_postfix_str("noise")
                summary["noise"] = run_noise_analysis(
                    config=config,
                    root_dir=root_dir,
                    backend=backend,
                    output_dir=out_dir,
                    class_index=DEFAULT_POSITIVE_CLASS_INDEX,
                    noise_sigmas=noise_factors,
                    threshold=DEFAULT_DECISION_THRESHOLD,
                    calibration_bins=DEFAULT_CALIBRATION_BINS,
                    bayesian_mc_passes=DEFAULT_BAYESIAN_MC_PASSES,
                )
                stage_bar.update(1)

                stage_bar.set_postfix_str("noise-correlation")
                noise_table_path = Path(str(summary["noise"]["table"]))
                noise_df = pd.read_csv(noise_table_path)
                summary["noise_accuracy_uncertainty"] = (
                    run_noise_accuracy_uncertainty_analysis(
                        noise_df=noise_df,
                        backend=backend,
                        output_dir=out_dir,
                    )
                )
                stage_bar.update(1)
            except Exception as exc:
                summary["noise"] = {
                    "skipped": True,
                    "reason": f"Noise analysis failed: {exc}",
                }
                summary["noise_accuracy_uncertainty"] = {
                    "skipped": True,
                    "reason": f"Noise relationship analysis failed: {exc}",
                }
                stage_bar.update(2)
    finally:
        stage_bar.close()

    report_path = _write_backend_plot_report(backend=backend, out_dir=out_dir)
    summary["plots_report"] = str(report_path)
    write_json(out_dir / "backend_summary.json", summary)
    return summary


def main() -> None:
    args = _parse_args()

    analysis_dir = Path(__file__).resolve().parent
    paths = init_runtime_paths(analysis_dir=analysis_dir, run_name=args.run_name)
    config = load_config(paths.root_dir)
    clinical_df = load_clinical_table(config=config, root_dir=paths.root_dir)

    manifest: dict[str, Any] = {
        "run_dir": str(paths.run_dir),
        "output_root": str(paths.output_root),
        "mode": (
            "dataset_summary_only"
            if bool(args.dataset_summary_only)
            else (
                "longitudinal_breakdown_only"
                if bool(args.longitudinal_breakdown_only)
                else (
                    "noise_correlation_only"
                    if bool(args.noise_correlation_only)
                    else "full"
                )
            )
        ),
        "positive_class_index": DEFAULT_POSITIVE_CLASS_INDEX,
        "threshold_sweep": {
            "values": [float(v) for v in threshold_grid().tolist()],
        },
        "calibration_bins": DEFAULT_CALIBRATION_BINS,
        "noise_factors": noise_factor_grid(),
        "bayesian_mc_passes": DEFAULT_BAYESIAN_MC_PASSES,
        "decision_threshold": DEFAULT_DECISION_THRESHOLD,
        "backends": {},
    }

    if args.dataset_summary_only:
        manifest["dataset_summary"] = run_dataset_summary(
            config=config,
            root_dir=paths.root_dir,
            output_dir=paths.run_dir,
            positive_class_index=DEFAULT_POSITIVE_CLASS_INDEX,
        )
        write_json(paths.run_dir / "run_manifest.json", manifest)
        print(f"Dataset summary complete. Results saved to {paths.run_dir}")
        return

    backend_iter = tqdm(args.backend, desc="Backends", unit="backend")
    for backend in backend_iter:
        out_dir = backend_dir(paths, backend)
        backend_iter.set_postfix_str(backend)
        if args.longitudinal_breakdown_only:
            manifest["backends"][backend] = _run_longitudinal_breakdown_only(
                config=config,
                backend=backend,
                clinical_df=clinical_df,
                out_dir=out_dir,
            )
        elif args.noise_correlation_only:
            manifest["backends"][backend] = _run_noise_correlation_only(
                backend=backend,
                out_dir=out_dir,
            )
        else:
            manifest["backends"][backend] = _run_backend(
                config=config,
                root_dir=paths.root_dir,
                backend=backend,
                clinical_df=clinical_df,
                skip_noise=bool(args.skip_noise),
                out_dir=out_dir,
            )

    write_json(paths.run_dir / "run_manifest.json", manifest)
    print(f"Analysis complete. Results saved to {paths.run_dir}")


if __name__ == "__main__":
    main()