import xarray as xr from utils.config import config import pathlib as pl import numpy as np import pandas as pd import matplotlib.pyplot as plt import os # This code compares the confidence of the model outputs with the physician ratings adni_data = pd.read_csv(config["analysis"]["adni_path"]) plots_dir = ( pl.Path("../output") / pl.Path(config["analysis"]["evaluation_name"]) / "plots" ) plots_dir.mkdir(parents=True, exist_ok=True) physician_ratings = adni_data[ "DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence) " ].to_numpy(dtype=str) image_ids = adni_data["Image Data ID"].to_numpy(dtype=str) physician_ratings = np.strings.strip(physician_ratings) image_ids = np.strings.strip(image_ids) # Filter to only include ratings where it is not "na" or only spaces valid_indices = np.where(physician_ratings != "na")[0] valid_indices = valid_indices[physician_ratings[valid_indices] != ""] physician_ratings = physician_ratings[valid_indices].astype(int) csv_img_ids = image_ids[valid_indices].astype(int) # Load the evaluation results os.chdir(pl.Path(__file__).parent) model_dataset_path = pl.Path("../model_evaluations") / pl.Path( config["analysis"]["evaluation_name"].strip() ).with_suffix(".nc") print(f"Loading evaluation results from {model_dataset_path}") array = xr.open_dataset(model_dataset_path) # type: ignore # This dataset includes two dataarrays: 'img_id' and 'predictions' that we will use to determine the model confidence for each image ID eval_img_ids = array["img_id"] predictions = array["predictions"] # Average across models to get the mean confidence for each image (taking the higher confidence between the two classes) model_confidences = predictions.mean(dim="model").max(dim="img_class").values # Find the shared image IDs between the model evaluation and the physician ratings shared_img_ids, model_indices, csv_indices = np.intersect1d( eval_img_ids.values, csv_img_ids, return_indices=True ) # Get the corresponding physician ratings and model confidences for the shared image IDs shared_physician_ratings = physician_ratings[csv_indices] shared_model_confidences = model_confidences[model_indices] # Print distribution of ratings for shared samples print("Distribution of Physician Ratings for Shared Samples:"), (unique, counts) = np.unique(shared_physician_ratings, return_counts=True) distribution = dict(zip(unique, counts)) for rating in range(1, 5): count = distribution.get(rating, 0) print(f" Rating {rating}: {count} samples") # Graph the model confidence vs physician ratings using a violin plot plt.figure(figsize=(10, 6)) plt.boxplot( [ shared_model_confidences[shared_physician_ratings == rating] for rating in range(1, 5) ], positions=[1, 2, 3, 4], widths=0.6, ) plt.xticks([1, 2, 3, 4], ["1 (Uncertain)", "2 (Mild)", "3 (Moderate)", "4 (High)"]) plt.xlabel("Physician Confidence Rating") plt.ylabel("Model Confidence") plt.title("Model Confidence vs Physician Confidence Ratings") plt.grid(axis="y") plt.savefig(plots_dir / "model_confidence_vs_physician_ratings_boxplot.png") plt.close()