hace 4 semanas · a52bd0e038
--- a/data/dataset.py
+++ b/data/dataset.py
@@ -3,10 +3,11 @@ import torch
 
				 import torch.utils.data as data
			
 
				 import pathlib as pl
			
 
				 import pandas as pd
			
 
				+from torch.utils.data import Subset, DataLoader
			
 
				 
			
 
				 
			
 
				 from jaxtyping import Float
			
 
				-from typing import Tuple, List, Callable
			
 
				+from typing import Tuple, Iterator, Callable, List
			
 
				 from result import Ok, Err, Result
			
 
				 
			
 
				 
			
@@ -18,16 +19,17 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				-        mri_data: Float[torch.Tensor, "n_samples, width, height, depth"],
			
 
				-        xls_data: Float[torch.Tensor, "n_samples, features"],
			
 
				+        mri_data: Float[torch.Tensor, "n_samples width height depth"],
			
 
				+        xls_data: Float[torch.Tensor, "n_samples features"],
			
 
				+        device: str = "cuda",
			
 
				     ):
			
 
				         """
			
 
				         Args:
			
 
				             mri_data (torch.Tensor): 4D tensor of MRI data with shape (n_samples, width, height, depth).
			
 
				             xls_data (torch.Tensor): 2D tensor of Excel data with shape (n_samples, features).
			
 
				         """
			
 
				-        self.mri_data = mri_data
			
 
				-        self.xls_data = xls_data
			
 
				+        self.mri_data = mri_data.float().to(device)
			
 
				+        self.xls_data = xls_data.float().to(device)
			
 
				 
			
 
				     def __len__(self) -> int:
			
 
				         """
			
@@ -36,7 +38,7 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				         return self.mri_data.shape[0]  # 0th dimension is the number of samples
			
 
				 
			
 
				     def __getitem__(self, idx: int) -> Tuple[
			
 
				-        Float[torch.Tensor, "width, height, depth"],
			
 
				+        Float[torch.Tensor, "width height depth"],
			
 
				         Float[torch.Tensor, "features"],
			
 
				     ]:
			
 
				         """
			
@@ -56,8 +58,9 @@ class ADNIDataset(data.Dataset):  # type: ignore
 
				 
			
 
				 
			
 
				 def load_adni_data_from_file(
			
 
				-    mri_files: List[pl.Path],  # List of nibablel files
			
 
				+    mri_files: Iterator[pl.Path],  # List of nibablel files
			
 
				     xls_file: pl.Path,  # Path to the Excel file
			
 
				+    device: str = "cuda",
			
 
				     xls_preprocessor: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
			
 
				 ) -> Result[ADNIDataset, str]:
			
 
				     """
			
@@ -85,7 +88,7 @@ def load_adni_data_from_file(
 
				 
			
 
				     # Check if the number of samples in MRI and Excel data match
			
 
				     if mri_data.shape[0] == xls_data.shape[0]:
			
 
				-        return Ok(ADNIDataset(mri_data, xls_data))
			
 
				+        return Ok(ADNIDataset(mri_data, xls_data, device=device))
			
 
				     else:
			
 
				         return Err("Loading MRI data failed")
			
 
				 
			
@@ -93,7 +96,7 @@ def load_adni_data_from_file(
 
				 def divide_dataset(
			
 
				     dataset: ADNIDataset,
			
 
				     ratios: Tuple[float, float, float],
			
 
				-    seed: int = 0,
			
 
				+    seed: int,
			
 
				 ) -> Result[List[data.Subset[ADNIDataset]], str]:
			
 
				     """
			
 
				     Divides the dataset into training, validation, and test sets.
			
@@ -113,3 +116,22 @@ def divide_dataset(
 
				     # Set the random seed for reproducibility
			
 
				     gen = torch.Generator().manual_seed(seed)
			
 
				     return Ok(data.random_split(dataset, ratios, generator=gen))
			
 
				+
			
 
				+
			
 
				+def initalize_dataloaders(
			
 
				+    datasets: List[Subset[ADNIDataset]],
			
 
				+    batch_size: int = 64,
			
 
				+) -> List[DataLoader[ADNIDataset]]:
			
 
				+    """
			
 
				+    Initializes the DataLoader for the given datasets.
			
 
				+
			
 
				+    Args:
			
 
				+        datasets (List[Subset[ADNIDataset]]): List of datasets to create DataLoaders for.
			
 
				+        batch_size (int): The batch size for the DataLoader.
			
 
				+
			
 
				+    Returns:
			
 
				+        List[DataLoader[ADNIDataset]]: A list of DataLoaders for the datasets.
			
 
				+    """
			
 
				+    return [
			
 
				+        DataLoader(dataset, batch_size=batch_size, shuffle=True) for dataset in datasets
			
 
				+    ]
			
--- a/train_model.py
+++ b/train_model.py
@@ -1,20 +1,149 @@
 
				 # Torch
			
 
				-import torch
			
 
				 import torch.nn as nn
			
 
				+import torch
			
 
				 import torch.optim as optim
			
 
				 
			
 
				 # Config
			
 
				-from utils.config import Config
			
 
				+from utils.config import config
			
 
				 import pathlib as pl
			
 
				+from result import Ok, Err
			
 
				+import json
			
 
				 
			
 
				 
			
 
				 # Custom modules
			
 
				 from model.cnn import CNN3D
			
 
				-from data.dataset import ADNIDataset, load_adni_data_from_file, divide_dataset
			
 
				+from utils.training import train_model, test_model
			
 
				+from data.dataset import (
			
 
				+    load_adni_data_from_file,
			
 
				+    divide_dataset,
			
 
				+    initalize_dataloaders,
			
 
				+)
			
 
				 
			
 
				+# Load data
			
 
				+mri_files = pl.Path(config["data"]["mri_files_path"]).glob("*.nii")
			
 
				+xls_file = pl.Path(config["data"]["xls_file_path"])
			
 
				 
			
 
				-# Load config
			
 
				-conf = Config()
			
 
				 
			
 
				-# Load data
			
 
				-mri_files = pl.Path(conf["data"]["mri_files"]).glob("*.nii")
			
 
				+# Load the data
			
 
				+
			
 
				+match load_adni_data_from_file(
			
 
				+    mri_files, xls_file, device=config["training"]["device"]
			
 
				+):
			
 
				+    case Ok(d):
			
 
				+        dataset = d
			
 
				+        print("Data loaded successfully")
			
 
				+    case Err(e):
			
 
				+        print(f"Error loading data: {e}")
			
 
				+        exit(-1)
			
 
				+
			
 
				+
			
 
				+# Divide the dataset into training and validation sets
			
 
				+if config["data"]["seed"] is None:
			
 
				+    print("Warning: No seed provided for dataset division, using default seed 0")
			
 
				+    config["data"]["seed"] = 0
			
 
				+
			
 
				+match divide_dataset(
			
 
				+    dataset, config["data"]["train_val_split"], seed=config["data"]["seed"]
			
 
				+):
			
 
				+    case Ok(s):
			
 
				+        if len(s) != 3:
			
 
				+            print(f"Error: Expected 3 subsets (train, val, test), got {len(s)}")
			
 
				+            exit(-1)
			
 
				+        datasets = s
			
 
				+        print("Dataset divided successfully")
			
 
				+    case Err(e):
			
 
				+        print(f"Error dividing dataset: {e}")
			
 
				+        exit(-1)
			
 
				+
			
 
				+
			
 
				+# Initialize the dataloaders
			
 
				+train_loader, val_loader, test_loader = initalize_dataloaders(
			
 
				+    datasets, batch_size=config["training"]["batch_size"]
			
 
				+)
			
 
				+
			
 
				+# Save seed to output config file
			
 
				+output_config_path = pl.Path(config["output"]["path"] / "config.json")
			
 
				+if not output_config_path.parent.exists():
			
 
				+    output_config_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+with open(output_config_path, "w") as f:
			
 
				+    # Save as JSON
			
 
				+    json.dump(config, f, indent=4)
			
 
				+print(f"Configuration saved to {output_config_path}")
			
 
				+
			
 
				+# Set up the ensemble training loop
			
 
				+
			
 
				+for run_num in range(config["training"]["ensemble_runs"]):
			
 
				+    print(f"Starting run {run_num + 1}/{config['training']['ensemble_runs']}")
			
 
				+
			
 
				+    # Initialize the model
			
 
				+    model = (
			
 
				+        CNN3D(
			
 
				+            image_channels=config["data"]["image_channels"],
			
 
				+            clin_data_channels=config["data"]["clin_data_channels"],
			
 
				+            num_classes=config["data"]["num_classes"],
			
 
				+            droprate=config["training"]["drop_rate"],
			
 
				+        )
			
 
				+        .float()
			
 
				+        .to(config["training"]["device"])
			
 
				+    )
			
 
				+
			
 
				+    # Set up the optimizer and loss function
			
 
				+    optimizer = optim.Adam(model.parameters(), lr=config["training"]["learning_rate"])
			
 
				+    criterion = nn.BCELoss()
			
 
				+
			
 
				+    # Train model
			
 
				+    model, history = train_model(
			
 
				+        model=model,
			
 
				+        train_loader=train_loader,
			
 
				+        val_loader=val_loader,
			
 
				+        optimizer=optimizer,
			
 
				+        criterion=criterion,
			
 
				+        num_epochs=config["training"]["num_epochs"],
			
 
				+        learning_rate=config["training"]["learning_rate"],
			
 
				+    )
			
 
				+
			
 
				+    # Test model
			
 
				+    test_loss, test_acc = test_model(
			
 
				+        model=model,
			
 
				+        test_loader=test_loader,
			
 
				+        criterion=criterion,
			
 
				+    )
			
 
				+
			
 
				+    print(
			
 
				+        f"Run {run_num + 1}/{config['training']['ensemble_runs']} - "
			
 
				+        f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}"
			
 
				+    )
			
 
				+
			
 
				+    # Save the model
			
 
				+    model_save_path = pl.Path(config["output"]["path"] / f"model_run_{run_num + 1}.pt")
			
 
				+    torch.save(model.state_dict(), model_save_path)
			
 
				+    print(f"Model saved to {model_save_path}")
			
 
				+
			
 
				+    # Save the training history
			
 
				+    history_save_path = pl.Path(
			
 
				+        config["output"]["path"] / f"history_run_{run_num + 1}.nc"
			
 
				+    )
			
 
				+
			
 
				+    history.to_netcdf(history_save_path, mode="w")  # type: ignore
			
 
				+    print(f"Training history saved to {history_save_path}")
			
 
				+
			
 
				+    # Save test results
			
 
				+    test_results_save_path = pl.Path(
			
 
				+        config["output"]["path"] / f"test_results_run_{run_num + 1}.json"
			
 
				+    )
			
 
				+    with open(test_results_save_path, "w") as f:
			
 
				+        json.dump(
			
 
				+            {
			
 
				+                "test_loss": test_loss,
			
 
				+                "test_accuracy": test_acc,
			
 
				+            },
			
 
				+            f,
			
 
				+            indent=4,
			
 
				+        )
			
 
				+    print(f"Test results saved to {test_results_save_path}")
			
 
				+    print(f"Run {run_num + 1}/{config['training']['ensemble_runs']} completed\n")
			
 
				+
			
 
				+# Completion message
			
 
				+print(f"All runs completed. Models and results saved to {config['output']['path']}")
			
--- a/utils/config.py
+++ b/utils/config.py
@@ -1,65 +1,27 @@
 
				-import typing
			
 
				-from pathlib import Path
			
 
				-import tomli
			
 
				+# This file serves as a singleton for the configuration settings of the project
			
 
				+
			
 
				+import tomllib
			
 
				 import os
			
 
				+import pathlib as pl
			
 
				+from typing import Any
			
 
				 
			
 
				 
			
 
				-@typing.no_type_check
			
 
				-class SingletonMeta(type):
			
 
				+def get_config() -> dict[str, Any]:
			
 
				     """
			
 
				-    Singleton metaclass to ensure only one instance of a class exists.
			
 
				+    Load the configuration file and return the settings as a dictionary.
			
 
				     """
			
 
				+    match os.getenv("ANN_CONFIG_PATH"):
			
 
				+        case None:
			
 
				+            config_path = pl.Path(__file__).parent.parent / "config.toml"
			
 
				+        case str(path):
			
 
				+            config_path = pl.Path(path)
			
 
				 
			
 
				-    _instances = {}
			
 
				-
			
 
				-    @typing.no_type_check
			
 
				-    def __call__(cls, *args, **kwargs):
			
 
				-        if cls not in cls._instances:
			
 
				-            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
			
 
				-        return cls._instances[cls]
			
 
				-
			
 
				-
			
 
				-class Config(metaclass=SingletonMeta):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        config_path: Path | None = Path(__file__).parent.parent / "config.toml",
			
 
				-    ):
			
 
				-        """
			
 
				-        Initialize the configuration object.
			
 
				-
			
 
				-        Args:
			
 
				-            config_path (Path): Path to the configuration file.
			
 
				-        """
			
 
				-
			
 
				-        if config_path is None and "ADL_CONFIG_PATH" in os.environ:
			
 
				-            self.config_path = Path(os.environ["ADL_CONFIG_PATH"])
			
 
				-        elif config_path is not None:
			
 
				-            self.config_path = config_path
			
 
				-        else:
			
 
				-            raise ValueError("Either config_path or ADL_CONFIG_PATH must be provided")
			
 
				-
			
 
				-        self.loaded_config_path = None
			
 
				-        self._load_config()
			
 
				-
			
 
				-    def _load_config(self):
			
 
				-        """
			
 
				-        Load the configuration from the specified file.
			
 
				-        """
			
 
				-        with open(self.config_path, "rb") as f:
			
 
				-            config = tomli.load(f)
			
 
				-            self.loaded_config_path = str(self.config_path)
			
 
				-            print(f"Loaded config from {self.config_path}")
			
 
				-
			
 
				-        self._config_dict = config
			
 
				+    if not config_path.exists():
			
 
				+        raise FileNotFoundError(f"Config file not found at {config_path}")
			
 
				+    with open(config_path, "rb") as f:
			
 
				+        config = tomllib.load(f)
			
 
				 
			
 
				-    def __getitem__(self, key: str):
			
 
				-        """
			
 
				-        Get a configuration value by key.
			
 
				+    return config
			
 
				 
			
 
				-        Args:
			
 
				-            key (str): The key of the configuration value.
			
 
				 
			
 
				-        Returns:
			
 
				-            The configuration value.
			
 
				-        """
			
 
				-        return self._config_dict.get(key, None)
			
 
				+config = get_config()
			
--- a/utils/training.py
+++ b/utils/training.py
@@ -0,0 +1,180 @@
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+from torch.utils.data import DataLoader
			
 
				+import xarray as xr
			
 
				+from data.dataset import ADNIDataset
			
 
				+from typing import Tuple
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+type TrainMetrics = Tuple[
			
 
				+    float, float, float, float
			
 
				+]  # (train_loss, val_loss, train_acc, val_acc)
			
 
				+
			
 
				+type TestMetrics = Tuple[float, float]  # (test_loss, test_acc)
			
 
				+
			
 
				+
			
 
				+def test_model(
			
 
				+    model: nn.Module,
			
 
				+    test_loader: DataLoader[ADNIDataset],
			
 
				+    criterion: nn.Module,
			
 
				+) -> TestMetrics:
			
 
				+    """
			
 
				+    Tests the model on the test dataset.
			
 
				+
			
 
				+    Args:
			
 
				+        model (nn.Module): The model to test.
			
 
				+        test_loader (DataLoader[ADNIDataset]): DataLoader for the test dataset.
			
 
				+        criterion (nn.Module): Loss function to compute the loss.
			
 
				+
			
 
				+    Returns:
			
 
				+        TrainMetrics: A tuple containing the test loss and test accuracy.
			
 
				+    """
			
 
				+    model.eval()
			
 
				+    test_loss = 0.0
			
 
				+    correct = 0
			
 
				+    total = 0
			
 
				+
			
 
				+    with torch.no_grad():
			
 
				+        for _, (inputs, targets) in tqdm(
			
 
				+            enumerate(test_loader), desc="Testing", total=len(test_loader), unit="batch"
			
 
				+        ):
			
 
				+            outputs = model(inputs)
			
 
				+            loss = criterion(outputs, targets)
			
 
				+            test_loss += loss.item() * inputs.size(0)
			
 
				+
			
 
				+            # Calculate accuracy
			
 
				+            predicted = (outputs > 0.5).float()
			
 
				+            correct += (predicted == targets).sum().item()
			
 
				+            total += targets.numel()
			
 
				+
			
 
				+    test_loss /= len(test_loader)
			
 
				+    test_acc = correct / total if total > 0 else 0.0
			
 
				+    return test_loss, test_acc
			
 
				+
			
 
				+
			
 
				+def train_epoch(
			
 
				+    model: nn.Module,
			
 
				+    train_loader: DataLoader[ADNIDataset],
			
 
				+    val_loader: DataLoader[ADNIDataset],
			
 
				+    optimizer: torch.optim.Optimizer,
			
 
				+    criterion: nn.Module,
			
 
				+) -> Tuple[float, float, float, float]:
			
 
				+    """
			
 
				+    Trains the model for one epoch and evaluates it on the validation set.
			
 
				+
			
 
				+    Args:
			
 
				+        model (nn.Module): The model to train.
			
 
				+        train_loader (DataLoader[ADNIDataset]): DataLoader for the training dataset.
			
 
				+        val_loader (DataLoader[ADNIDataset]): DataLoader for the validation dataset.
			
 
				+        optimizer (torch.optim.Optimizer): Optimizer for updating model parameters.
			
 
				+        criterion (nn.Module): Loss function to compute the loss.
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[float, float, float, float]: A tuple containing the training loss, validation loss, training accuracy, and validation accuracy.
			
 
				+    """
			
 
				+    model.train()
			
 
				+    train_loss = 0.0
			
 
				+
			
 
				+    # Training loop
			
 
				+    for _, (inputs, targets) in tqdm(
			
 
				+        enumerate(train_loader), desc="Training", total=len(train_loader), unit="batch"
			
 
				+    ):
			
 
				+        optimizer.zero_grad()
			
 
				+        outputs = model(inputs)
			
 
				+        loss = criterion(outputs, targets)
			
 
				+        loss.backward()
			
 
				+        optimizer.step()
			
 
				+        train_loss += loss.item() * inputs.size(0)
			
 
				+    train_loss /= len(train_loader)
			
 
				+
			
 
				+    model.eval()
			
 
				+    val_loss = 0.0
			
 
				+    correct = 0
			
 
				+    total = 0
			
 
				+
			
 
				+    with torch.no_grad():
			
 
				+        for _, (inputs, targets) in tqdm(
			
 
				+            enumerate(val_loader),
			
 
				+            desc="Validation",
			
 
				+            total=len(val_loader),
			
 
				+            unit="batch",
			
 
				+        ):
			
 
				+            outputs = model(inputs)
			
 
				+            loss = criterion(outputs, targets)
			
 
				+            val_loss += loss.item() * inputs.size(0)
			
 
				+
			
 
				+            # Calculate accuracy
			
 
				+            predicted = (outputs > 0.5).float()
			
 
				+            correct += (predicted == targets).sum().item()
			
 
				+            total += targets.numel()
			
 
				+
			
 
				+    val_loss /= len(val_loader)
			
 
				+    val_acc = correct / total if total > 0 else 0.0
			
 
				+    train_acc = correct / total if total > 0 else 0.0
			
 
				+    return train_loss, val_loss, train_acc, val_acc
			
 
				+
			
 
				+
			
 
				+def train_model(
			
 
				+    model: nn.Module,
			
 
				+    train_loader: DataLoader[ADNIDataset],
			
 
				+    val_loader: DataLoader[ADNIDataset],
			
 
				+    optimizer: torch.optim.Optimizer,
			
 
				+    criterion: nn.Module,
			
 
				+    num_epochs: int,
			
 
				+    learning_rate: float,
			
 
				+) -> Tuple[nn.Module, xr.DataArray]:
			
 
				+    """
			
 
				+    Trains the model using the provided training and validation data loaders.
			
 
				+
			
 
				+    Args:
			
 
				+        model (nn.Module): The model to train.
			
 
				+        train_loader (DataLoader[ADNIDataset]): DataLoader for the training dataset.
			
 
				+        val_loader (DataLoader[ADNIDataset]): DataLoader for the validation dataset.
			
 
				+        num_epochs (int): Number of epochs to train the model.
			
 
				+        learning_rate (float): Learning rate for the optimizer.
			
 
				+
			
 
				+    Returns:
			
 
				+        Result[nn.Module, str]: A Result object containing the trained model or an error message.
			
 
				+    """
			
 
				+
			
 
				+    # Record the training history
			
 
				+    # We record the Epoch, Training Loss, Validation Loss, Training Accuracy, and Validation Accuracy
			
 
				+    history = xr.DataArray(
			
 
				+        data=[],
			
 
				+        dims=["epoch", "metric"],
			
 
				+        coords={
			
 
				+            "epoch": range(num_epochs),
			
 
				+            "metric": ["train_loss", "val_loss", "train_acc", "val_acc"],
			
 
				+        },
			
 
				+    )
			
 
				+
			
 
				+    for epoch in range(num_epochs):
			
 
				+        train_loss, val_loss, train_acc, val_acc = train_epoch(
			
 
				+            model,
			
 
				+            train_loader,
			
 
				+            val_loader,
			
 
				+            optimizer,
			
 
				+            criterion,
			
 
				+        )
			
 
				+
			
 
				+        # Update the history
			
 
				+        history[
			
 
				+            {
			
 
				+                "epoch": epoch,
			
 
				+                "metric": ["train_loss", "val_loss", "train_acc", "val_acc"],
			
 
				+            }
			
 
				+        ] = [train_loss, val_loss, train_acc, val_acc]
			
 
				+
			
 
				+        print(
			
 
				+            f"Epoch [{epoch + 1}/{num_epochs}], "
			
 
				+            f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
			
 
				+            f"Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}"
			
 
				+        )
			
 
				+
			
 
				+        # If we are at 25, 50, or 75% of the epochs, save the model
			
 
				+        if (epoch + 1) % (num_epochs // 4) == 0:
			
 
				+            torch.save(model.state_dict(), f"model_epoch_{epoch + 1}.pth")
			
 
				+            print(f"Model saved at epoch {epoch + 1}")
			
 
				+
			
 
				+    # return the trained model and the traning history
			
 
				+    return model, history