1 개월 전 · 2f1139aae6
--- a/data/dataset.py
+++ b/data/dataset.py
@@ -22,6 +22,7 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				         mri_data: Float[torch.Tensor, "n_samples channels width height depth"],
			
 
				         xls_data: Float[torch.Tensor, "n_samples features"],
			
 
				         expected_classes: Float[torch.Tensor, "classes"],
			
 
				+        filename_ids: List[int],
			
 
				         device: str = "cuda",
			
 
				     ):
			
 
				         """
			
@@ -32,6 +33,7 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				         self.mri_data = mri_data.float().to(device)
			
 
				         self.xls_data = xls_data.float().to(device)
			
 
				         self.expected_classes = expected_classes.float().to(device)
			
 
				+        self.filename_ids = filename_ids
			
 
				 
			
 
				     def __len__(self) -> int:
			
 
				         """
			
@@ -43,6 +45,7 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				         Float[torch.Tensor, "channels width height depth"],
			
 
				         Float[torch.Tensor, "features"],
			
 
				         Float[torch.Tensor, "classes"],
			
 
				+        int,
			
 
				     ]:
			
 
				         """
			
 
				         Returns a sample from the dataset at the given index.
			
@@ -59,8 +62,9 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				         xls_sample = self.xls_data[idx]
			
 
				         # Assuming expected_classes is a tensor of classes, we return it as well
			
 
				         expected_classes = self.expected_classes[idx]
			
 
				+        filename_id = self.filename_ids[idx]
			
 
				 
			
 
				-        return mri_sample, xls_sample, expected_classes
			
 
				+        return mri_sample, xls_sample, expected_classes, filename_id
			
 
				 
			
 
				 
			
 
				 def load_adni_data_from_file(
			
@@ -77,7 +81,7 @@ def load_adni_data_from_file(
 
				         xls_file (pl.Path): Path to the Excel file.
			
 
				 
			
 
				     Returns:
			
 
				-        Result[ADNIDataset, str]: A Result object containing the ADNIDataset or an error message.
			
 
				+        ADNIDataset: The loaded dataset.
			
 
				     """
			
 
				     # Load the Excel data
			
 
				     xls_values = xls_preprocessor(pd.read_csv(xls_file))  # type: ignore
			
@@ -141,7 +145,7 @@ def load_adni_data_from_file(
 
				         expected_classes_unstacked
			
 
				     )  # Stack the list of expected classes into a single tensor
			
 
				 
			
 
				-    return ADNIDataset(mri_data, xls_data, expected_classes, device=device)
			
 
				+    return ADNIDataset(mri_data, xls_data, expected_classes, img_ids, device=device)
			
 
				 
			
 
				 
			
 
				 def divide_dataset(
			
--- a/evaluate_models.py
+++ b/evaluate_models.py
@@ -82,16 +82,34 @@ placeholder = np.zeros(
 
				     dtype=np.float32,
			
 
				 )  # Placeholder for results
			
 
				 
			
 
				+# Get the total list of image_ids
			
 
				+img_ids = [img_id for _, _, _, img_id in combined_loader.dataset]
			
 
				+
			
 
				+
			
 
				 placeholder[:] = np.nan  # Fill with NaNs for easier identification of missing data
			
 
				-dimensions = ["model", "batch", "img_class"]
			
 
				+dimensions = ["model", "img_id", "img_class"]
			
 
				 coords = {
			
 
				     "model": [int(mf.stem.split("_")[2]) for mf in model_files],
			
 
				-    "batch": list(range(len(combined_loader))),
			
 
				+    "img_id": img_ids,
			
 
				     "img_class": list(range(config["data"]["num_classes"])),
			
 
				 }
			
 
				-
			
 
				 results = xr.DataArray(placeholder, coords=coords, dims=dimensions)
			
 
				 
			
 
				+# Now initialize an additional dataarray to hold the labels per image
			
 
				+labels_placeholder = np.zeros(
			
 
				+    (len(combined_loader), config["data"]["num_classes"]), dtype=np.float32
			
 
				+)
			
 
				+labels_placeholder[:] = np.nan
			
 
				+labels_coords = {
			
 
				+    "img_id": img_ids,
			
 
				+    "label": list(range(config["data"]["num_classes"])),
			
 
				+}  # type: ignore
			
 
				+
			
 
				+labels = xr.DataArray(
			
 
				+    labels_placeholder, coords=labels_coords, dims=["img_id", "label"]
			
 
				+)
			
 
				+
			
 
				+
			
 
				 for model_file in model_files:
			
 
				     model_num = int(model_file.stem.split("_")[2])
			
 
				     print(f"Evaluating model {model_num}...")
			
@@ -114,15 +132,17 @@ for model_file in model_files:
 
				     model.eval()
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for batch_idx, (mri_batch, xls_batch, labels_batch) in enumerate(
			
 
				-            combined_loader
			
 
				-        ):
			
 
				-            outputs = model((mri_batch.float(), xls_batch.float()))
			
 
				+        for batch_idx, (mri, xls, label, img_id) in enumerate(combined_loader):
			
 
				+            outputs = model((mri.float(), xls.float()))
			
 
				             probabilities = outputs.cpu().numpy()[0, :]  # type: ignore
			
 
				 
			
 
				-            results.loc[model_num, batch_idx, :] = probabilities  # type: ignore
			
 
				+            results.loc[model_num, img_id, :] = probabilities  # type: ignore
			
 
				+            labels.loc[int(img_id.cpu()), :] = label.cpu().numpy()[0, :]  # type: ignore
			
 
				+
			
 
				+# Combine results and labels into a single Dataset
			
 
				+output_set = xr.Dataset({"predictions": results, "labels": labels})
			
 
				 
			
 
				 # Save results to netcdf file
			
 
				 output_path = pl.Path(config["output"]["path"]) / "model_evaluation_results.nc"
			
 
				-results.to_netcdf(output_path, mode="w")  # type: ignore
			
 
				+output_set.to_netcdf(output_path, mode="w")  # type: ignore
			
 
				 print(f"Results saved to {output_path}")
			
--- a/generate_statistics.py
+++ b/generate_statistics.py
@@ -1,2 +1,43 @@
 
				 import xarray as xr
			
 
				 from utils.config import config
			
 
				+import pathlib as pl
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+# Load the evaluation results
			
 
				+array = xr.open_dataset(pl.Path(config["output"]["path"]) / "model_evaluation_results.nc")  # type: ignore
			
 
				+
			
 
				+
			
 
				+# This dataset includes two dataarrays: 'predictions' and 'labels'
			
 
				+
			
 
				+# For the first analysis, the goal is to average the predictions across all models for each image, then to determine the accuracy of these averaged predictions against the true labels, graphing accurac vs confidence threshold.
			
 
				+
			
 
				+predictions: xr.DataArray = array["predictions"]
			
 
				+labels: xr.DataArray = array["labels"]
			
 
				+
			
 
				+# Average predictions across models
			
 
				+avg_predictions = predictions.mean(dim="model")
			
 
				+
			
 
				+# Loop through different confidence thresholds and calculate accuracy
			
 
				+thresholds = np.linspace(0.5, 1.0, num=10)  # From 0.5 to 1.0
			
 
				+accuracies = []
			
 
				+
			
 
				+for i, threshold in enumerate(thresholds):
			
 
				+    # pick the positive class for the lables and predictions
			
 
				+    predicted_positive = avg_predictions.sel(img_class=1) >= threshold
			
 
				+    true_positive = labels.sel(label=1) == 1
			
 
				+
			
 
				+
			
 
				+    # Calculate accuracy
			
 
				+    correct_predictions = (predicted_positive == true_positive).sum().item()
			
 
				+
			
 
				+    # For debugging, print list of predictions, labels and correctness
			
 
				+
			
 
				+    total_predictions = len(avg_predictions.img_id)
			
 
				+    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0.0
			
 
				+    accuracies.append(accuracy)
			
 
				+
			
 
				+# Print the accuracies for each threshold
			
 
				+for threshold, accuracy in zip(thresholds, accuracies):
			
 
				+    print(f"Threshold: {threshold:.2f}, Accuracy: {accuracy:.4f}")
			
--- a/utils/training.py
+++ b/utils/training.py
@@ -38,7 +38,7 @@ def test_model(
 
				     total = 0
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for _, (mri, xls, targets) in tqdm(
			
 
				+        for _, (mri, xls, targets, _) in tqdm(
			
 
				             enumerate(test_loader), desc="Testing", total=len(test_loader), unit="batch"
			
 
				         ):
			
 
				             outputs = model((mri, xls))
			
@@ -79,7 +79,7 @@ def train_epoch(
 
				     train_loss = 0.0
			
 
				 
			
 
				     # Training loop
			
 
				-    for _, (mri, xls, targets) in tqdm(
			
 
				+    for _, (mri, xls, targets, _) in tqdm(
			
 
				         enumerate(train_loader), desc="Training", total=len(train_loader), unit="batch"
			
 
				     ):
			
 
				         optimizer.zero_grad()
			
@@ -96,7 +96,7 @@ def train_epoch(
 
				     total = 0
			
 
				 
			
 
				     with torch.no_grad():
			
 
				-        for _, (mri, xls, targets) in tqdm(
			
 
				+        for _, (mri, xls, targets, _) in tqdm(
			
 
				             enumerate(val_loader),
			
 
				             desc="Validation",
			
 
				             total=len(val_loader),