import xarray as xr
from utils.config import config
import pathlib as pl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# This code compares the confidence of the model outputs with the physician ratings

adni_data = pd.read_csv(config["analysis"]["adni_path"])

plots_dir = (
    pl.Path("../output") / pl.Path(config["analysis"]["evaluation_name"]) / "plots"
)
plots_dir.mkdir(parents=True, exist_ok=True)


physician_ratings = adni_data[
    "DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence) "
].to_numpy(dtype=str)

image_ids = adni_data["Image Data ID"].to_numpy(dtype=str)

physician_ratings = np.strings.strip(physician_ratings)
image_ids = np.strings.strip(image_ids)

# Filter to only include ratings where it is not "na" or only spaces
valid_indices = np.where(physician_ratings != "na")[0]
valid_indices = valid_indices[physician_ratings[valid_indices] != ""]
physician_ratings = physician_ratings[valid_indices].astype(int)
csv_img_ids = image_ids[valid_indices].astype(int)


# Load the evaluation results
os.chdir(pl.Path(__file__).parent)
model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
    config["analysis"]["evaluation_name"].strip()
).with_suffix(".nc")

print(f"Loading evaluation results from {model_dataset_path}")

array = xr.open_dataset(model_dataset_path)  # type: ignore

# This dataset includes two dataarrays: 'img_id' and 'predictions' that we will use to determine the model confidence for each image ID
eval_img_ids = array["img_id"]
predictions = array["predictions"]

# Average across models to get the mean confidence for each image (taking the higher confidence between the two classes)
model_confidences = predictions.mean(dim="model").max(dim="img_class").values


# Find the shared image IDs between the model evaluation and the physician ratings
shared_img_ids, model_indices, csv_indices = np.intersect1d(
    eval_img_ids.values, csv_img_ids, return_indices=True
)


# Get the corresponding physician ratings and model confidences for the shared image IDs
shared_physician_ratings = physician_ratings[csv_indices]
shared_model_confidences = model_confidences[model_indices]


# Print distribution of ratings for shared samples
print("Distribution of Physician Ratings for Shared Samples:"),
(unique, counts) = np.unique(shared_physician_ratings, return_counts=True)
distribution = dict(zip(unique, counts))
for rating in range(1, 5):
    count = distribution.get(rating, 0)
    print(f"  Rating {rating}: {count} samples")


# Graph the model confidence vs physician ratings using a violin plot
plt.figure(figsize=(10, 6))
plt.boxplot(
    [
        shared_model_confidences[shared_physician_ratings == rating]
        for rating in range(1, 5)
    ],
    positions=[1, 2, 3, 4],
    widths=0.6,
)
plt.xticks([1, 2, 3, 4], ["1 (Uncertain)", "2 (Mild)", "3 (Moderate)", "4 (High)"])
plt.xlabel("Physician Confidence Rating")
plt.ylabel("Model Confidence")
plt.title("Model Confidence vs Physician Confidence Ratings")
plt.grid(axis="y")
plt.savefig(plots_dir / "model_confidence_vs_physician_ratings_boxplot.png")
plt.close()