| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import xarray as xr
- from utils.config import config
- import pathlib as pl
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import os
- # This code compares the confidence of the model outputs with the physician ratings
- adni_data = pd.read_csv(config["analysis"]["adni_path"])
- plots_dir = (
- pl.Path("../output") / pl.Path(config["analysis"]["evaluation_name"]) / "plots"
- )
- plots_dir.mkdir(parents=True, exist_ok=True)
- physician_ratings = adni_data[
- "DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence) "
- ].to_numpy(dtype=str)
- image_ids = adni_data["Image Data ID"].to_numpy(dtype=str)
- physician_ratings = np.strings.strip(physician_ratings)
- image_ids = np.strings.strip(image_ids)
- # Filter to only include ratings where it is not "na" or only spaces
- valid_indices = np.where(physician_ratings != "na")[0]
- valid_indices = valid_indices[physician_ratings[valid_indices] != ""]
- physician_ratings = physician_ratings[valid_indices].astype(int)
- csv_img_ids = image_ids[valid_indices].astype(int)
- # Load the evaluation results
- os.chdir(pl.Path(__file__).parent)
- model_dataset_path = pl.Path("../model_evaluations") / pl.Path(
- config["analysis"]["evaluation_name"].strip()
- ).with_suffix(".nc")
- print(f"Loading evaluation results from {model_dataset_path}")
- array = xr.open_dataset(model_dataset_path) # type: ignore
- # This dataset includes two dataarrays: 'img_id' and 'predictions' that we will use to determine the model confidence for each image ID
- eval_img_ids = array["img_id"]
- predictions = array["predictions"]
- # Average across models to get the mean confidence for each image (taking the higher confidence between the two classes)
- model_confidences = predictions.mean(dim="model").max(dim="img_class").values
- # Find the shared image IDs between the model evaluation and the physician ratings
- shared_img_ids, model_indices, csv_indices = np.intersect1d(
- eval_img_ids.values, csv_img_ids, return_indices=True
- )
- # Get the corresponding physician ratings and model confidences for the shared image IDs
- shared_physician_ratings = physician_ratings[csv_indices]
- shared_model_confidences = model_confidences[model_indices]
- # Print distribution of ratings for shared samples
- print("Distribution of Physician Ratings for Shared Samples:"),
- (unique, counts) = np.unique(shared_physician_ratings, return_counts=True)
- distribution = dict(zip(unique, counts))
- for rating in range(1, 5):
- count = distribution.get(rating, 0)
- print(f" Rating {rating}: {count} samples")
- # Graph the model confidence vs physician ratings using a violin plot
- plt.figure(figsize=(10, 6))
- plt.boxplot(
- [
- shared_model_confidences[shared_physician_ratings == rating]
- for rating in range(1, 5)
- ],
- positions=[1, 2, 3, 4],
- widths=0.6,
- )
- plt.xticks([1, 2, 3, 4], ["1 (Uncertain)", "2 (Mild)", "3 (Moderate)", "4 (High)"])
- plt.xlabel("Physician Confidence Rating")
- plt.ylabel("Model Confidence")
- plt.title("Model Confidence vs Physician Confidence Ratings")
- plt.grid(axis="y")
- plt.savefig(plots_dir / "model_confidence_vs_physician_ratings_boxplot.png")
- plt.close()
|