# pyright: basic

from __future__ import annotations

import numpy as np


def binary_confusion(
    y_true: np.ndarray, y_prob: np.ndarray, threshold: float
) -> dict[str, int]:
    y_true_int = y_true.astype(int)
    y_pred = (y_prob >= threshold).astype(int)

    tp = int(np.logical_and(y_pred == 1, y_true_int == 1).sum())
    fp = int(np.logical_and(y_pred == 1, y_true_int == 0).sum())
    tn = int(np.logical_and(y_pred == 0, y_true_int == 0).sum())
    fn = int(np.logical_and(y_pred == 0, y_true_int == 1).sum())

    return {"tp": tp, "fp": fp, "tn": tn, "fn": fn}


def _safe_div(num: float, den: float) -> float:
    if den == 0:
        return 0.0
    return num / den


def performance_at_threshold(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    threshold: float,
) -> dict[str, float]:
    c = binary_confusion(y_true, y_prob, threshold)
    tp = c["tp"]
    fp = c["fp"]
    tn = c["tn"]
    fn = c["fn"]

    total = tp + fp + tn + fn
    accuracy = _safe_div(tp + tn, total)
    precision = _safe_div(tp, tp + fp)
    recall = _safe_div(tp, tp + fn)
    f1 = _safe_div(2 * precision * recall, precision + recall)

    return {
        "threshold": float(threshold),
        "accuracy": float(accuracy),
        "precision": float(precision),
        "recall": float(recall),
        "f1": float(f1),
        "tp": float(tp),
        "fp": float(fp),
        "tn": float(tn),
        "fn": float(fn),
    }


def threshold_sweep(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    thresholds: np.ndarray,
) -> list[dict[str, float]]:
    return [performance_at_threshold(y_true, y_prob, float(t)) for t in thresholds]


def calibration_stats(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    bins: int = 10,
) -> tuple[dict[str, float], np.ndarray]:
    y_true_int = y_true.astype(int)
    y_prob_f = y_prob.astype(float)

    edges = np.linspace(0.0, 1.0, bins + 1)
    bin_data: list[tuple[float, float, int]] = []
    mce = 0.0

    n = len(y_prob_f)
    for i in range(bins):
        lo = edges[i]
        hi = edges[i + 1]
        if i == bins - 1:
            mask = (y_prob_f >= lo) & (y_prob_f <= hi)
        else:
            mask = (y_prob_f >= lo) & (y_prob_f < hi)

        count = int(mask.sum())
        if count == 0:
            bin_data.append((float((lo + hi) / 2.0), np.nan, 0))
            continue

        mean_conf = float(y_prob_f[mask].mean())
        frac_pos = float(y_true_int[mask].mean())
        gap = abs(frac_pos - mean_conf)
        mce = max(mce, gap)

        bin_data.append((mean_conf, frac_pos, count))

    brier = float(np.mean((y_prob_f - y_true_int) ** 2))

    summary = {
        "mce": float(mce),
        "brier": brier,
        "bins": float(bins),
    }

    arr = np.array(bin_data, dtype=float)
    return summary, arr