Pārlūkot izejas kodu

Completed move and added initial ensemble code

Nicholas Schense 5 mēneši atpakaļ
vecāks
revīzija
11d8cc1327
11 mainītis faili ar 268 papildinājumiem un 78 dzēšanām
  1. 4 0
      .gitignore
  2. 8 5
      config.toml
  3. 82 0
      ensemble_predict.py
  4. 46 13
      train_cnn.py
  5. 47 38
      utils/data/datasets.py
  6. 55 0
      utils/ensemble.py
  7. 0 6
      utils/models/cnn.py
  8. 0 6
      utils/models/layers.py
  9. 10 0
      utils/system.py
  10. 7 0
      utils/testing.py
  11. 9 10
      utils/training.py

+ 4 - 0
.gitignore

@@ -1,3 +1,7 @@
+#Custom gitignore
+saved_models/
+
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

+ 8 - 5
config.toml

@@ -7,21 +7,24 @@ model_output = '/export/home/nschense/alzheimers/alzheimers_nn/saved_models/'
 
 [training]
 device = 'cuda:1'
-runs = 1
-max_epochs = 30
+runs = 10
+max_epochs = 10
 
 [dataset]
 validation_split = 0.3
 
 [model]
-name = 'cnn-ensemble1'
+name = 'cnn-ensemble10x10'
 image_channels = 1
 clin_data_channels = 2
 
 [hyperparameters]
-batch_size = 64
+batch_size = 32
 learning_rate = 0.0001
 droprate = 0.5
 
 [operation]
-silent = false
+silent = false
+
+[ensemble]
+name = 'cnn-ensemble1'

+ 82 - 0
ensemble_predict.py

@@ -0,0 +1,82 @@
+import utils.ensemble as ens
+import os
+import tomli as toml
+from utils.system import force_init_cudnn
+from utils.data.datasets import prepare_datasets
+import math
+import torch
+
+# CONFIGURATION
+if os.getenv("ADL_CONFIG_PATH") is None:
+    with open("config.toml", "rb") as f:
+        config = toml.load(f)
+else:
+    with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
+        config = toml.load(f)
+
+# Force cuDNN initialization
+force_init_cudnn(config["training"]["device"])
+
+
+ensemble_folder = config["paths"]["model_output"] + config["ensemble"]["name"] + "/"
+models, model_descs = ens.load_models(ensemble_folder, config["training"]["device"])
+
+# Load test data
+test_dataset = prepare_datasets(
+    config["paths"]["mri_data"],
+    config["paths"]["xls_data"],
+    config["dataset"]["validation_split"],
+    0,
+    config["training"]["device"],
+)[2]
+
+# Evaluate ensemble and uncertainty test set
+correct = 0
+total = 0
+predictions = []
+actual = []
+stdevs = []
+yes_votes = []
+no_votes = []
+
+for data, target in test_dataset:
+    mri, xls = data
+    mri = mri.unsqueeze(0)
+    xls = xls.unsqueeze(0)
+    data = (mri, xls)
+    mean, variance = ens.ensemble_predict(models, data)
+    _, yes_votes, no_votes = ens.ensemble_predict_strict_classes(models, data)
+    stdevs.append(math.sqrt(variance.item()))
+    predicted = torch.round(mean)
+    expected = target[1]
+
+    total += 1
+    correct += (predicted == expected).item()
+
+    out = mean.tolist()
+    predictions.append(out)
+
+    act = target[1].tolist()
+    actual.append(act)
+
+
+accuracy = correct / total
+
+with open(ensemble_folder + "ensemble_test_results.txt", "w") as f:
+    f.write("Accuracy: " + str(accuracy) + "\n")
+    f.write("Correct: " + str(correct) + "\n")
+    f.write("Total: " + str(total) + "\n")
+
+    for exp, pred, stdev in zip(actual, predictions, stdevs):
+        f.write(
+            str(exp)
+            + ", "
+            + str(pred)
+            + ", "
+            + str(stdev)
+            + ", "
+            + str(yes_votes.item())
+            + ", "
+            + str(no_votes.item())
+            + "\n"
+        )

+ 46 - 13
train_cnn.py

@@ -9,14 +9,17 @@ import random as rand
 # SYSTEM
 import tomli as toml
 import os
+import warnings
 
 # DATA PROCESSING
-from sklearn.model_selection import train_test_split
 
 # CUSTOM MODULES
 import utils.models.cnn as cnn
 from utils.data.datasets import prepare_datasets, initalize_dataloaders
 import utils.training as train
+import utils.testing as testn
+from utils.system import force_init_cudnn
+
 
 # CONFIGURATION
 if os.getenv("ADL_CONFIG_PATH") is None:
@@ -26,13 +29,20 @@ else:
     with open(os.getenv("ADL_CONFIG_PATH"), "rb") as f:
         config = toml.load(f)
 
+# Force cuDNN initialization
+force_init_cudnn(config["training"]["device"])
+
 for i in range(config["training"]["runs"]):
     # Set up the model
-    model = cnn.CNN(
-        config["model"]["image_channels"],
-        config["model"]["clin_data_channels"],
-        config["hyperparameters"]["droprate"],
-    ).float()
+    model = (
+        cnn.CNN(
+            config["model"]["image_channels"],
+            config["model"]["clin_data_channels"],
+            config["hyperparameters"]["droprate"],
+        )
+        .float()
+        .to(config["training"]["device"])
+    )
     criterion = nn.BCELoss()
     optimizer = optim.Adam(
         model.parameters(), lr=config["hyperparameters"]["learning_rate"]
@@ -47,6 +57,7 @@ for i in range(config["training"]["runs"]):
         config["paths"]["xls_data"],
         config["dataset"]["validation_split"],
         seed,
+        config["training"]["device"],
     )
     train_dataloader, val_dataloader, test_dataloader = initalize_dataloaders(
         train_dataset,
@@ -55,10 +66,21 @@ for i in range(config["training"]["runs"]):
         config["hyperparameters"]["batch_size"],
     )
 
+    if not config["operation"]["silent"]:
+        print(
+            f"Training model {i + 1} / {config["training"]["runs"]} with seed {seed}..."
+        )
+
     # Train the model
-    history = train.train_model(
-        model, train_dataloader, val_dataloader, criterion, optimizer, config
-    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+
+        history = train.train_model(
+            model, train_dataloader, val_dataloader, criterion, optimizer, config
+        )
+
+    # Test Model
+    tes_acc = testn.test_model(model, test_dataloader, config)
 
     # Save model
     if not os.path.exists(
@@ -68,14 +90,25 @@ for i in range(config["training"]["runs"]):
             config["paths"]["model_output"] + "/" + str(config["model"]["name"])
         )
 
-    torch.save(
-        model,
+    model_save_path = (
         config["paths"]["model_output"]
         + "/"
         + str(config["model"]["name"])
         + "/"
-        + str(i)
+        + str(i + 1)
         + "_s-"
         + str(seed)
-        + ".pt",
     )
+
+    torch.save(
+        model,
+        model_save_path + ".pt",
+    )
+
+    history.to_csv(
+        model_save_path + "_history.csv",
+        index=True,
+    )
+
+    with open(model_save_path + "_test_acc.txt", "w") as f:
+        f.write(str(tes_acc))

+ 47 - 38
utils/data/datasets.py

@@ -3,22 +3,23 @@
 # TODO ENSURE ITERATION WORKS
 import glob
 import nibabel as nib
-import numpy as np
 import random
 import torch
 from torch.utils.data import Dataset
-import pandas as pd 
+import pandas as pd
 from torch.utils.data import DataLoader
 
 
-
-'''
+"""
 Prepares CustomDatasets for training, validating, and testing CNN
-'''
-def prepare_datasets(mri_dir, xls_file, val_split=0.2, seed=50):
+"""
+
 
+def prepare_datasets(
+    mri_dir, xls_file, val_split=0.2, seed=50, device=torch.device("cpu")
+):
     rndm = random.Random(seed)
-    xls_data = pd.read_csv(xls_file).set_index('Image Data ID')
+    xls_data = pd.read_csv(xls_file).set_index("Image Data ID")
     raw_data = glob.glob(mri_dir + "*")
     AD_list = []
     NL_list = []
@@ -39,19 +40,21 @@ def prepare_datasets(mri_dir, xls_file, val_split=0.2, seed=50):
     rndm.shuffle(val_list)
     rndm.shuffle(test_list)
 
-    train_dataset = ADNIDataset(train_list, xls_data)
-    val_dataset = ADNIDataset(val_list, xls_data)
-    test_dataset = ADNIDataset(test_list, xls_data)
+    train_dataset = ADNIDataset(train_list, xls_data, device=device)
+    val_dataset = ADNIDataset(val_list, xls_data, device=device)
+    test_dataset = ADNIDataset(test_list, xls_data, device=device)
 
     return train_dataset, val_dataset, test_dataset
 
     # TODO  Normalize data? Later add / Exctract clinical data? Which data?
 
-'''
+
+"""
 Returns train_list, val_list and test_list in format [(image, id), ...] each
-'''
-def get_train_val_test(AD_list, NL_list, val_split):
+"""
 
+
+def get_train_val_test(AD_list, NL_list, val_split):
     train_list, val_list, test_list = [], [], []
 
     num_test_ad = int(len(AD_list) * val_split)
@@ -84,53 +87,59 @@ def get_train_val_test(AD_list, NL_list, val_split):
 
 
 class ADNIDataset(Dataset):
-    def __init__(self, mri, xls: pd.DataFrame):
-        self.mri_data = mri        # DATA IS A LIST WITH TUPLES (image_dir, class_id)
+    def __init__(self, mri, xls: pd.DataFrame, device=torch.device("cpu")):
+        self.mri_data = mri  # DATA IS A LIST WITH TUPLES (image_dir, class_id)
         self.xls_data = xls
-
+        self.device = device
 
     def __len__(self):
         return len(self.mri_data)
-    
+
     def _xls_to_tensor(self, xls_data: pd.Series):
-        #Get used data
+        # Get used data
+
+        # data = xls_data.loc[['Sex', 'Age (current)', 'PTID', 'DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence)', 'Alz_csf']]
+        data = xls_data.loc[["Sex", "Age (current)"]]
 
-        #data = xls_data.loc[['Sex', 'Age (current)', 'PTID', 'DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence)', 'Alz_csf']]
-        data = xls_data.loc[['Sex', 'Age (current)']]
-        
-        data.replace({'M': 0, 'F': 1}, inplace=True)
-        
+        data.replace({"M": 0, "F": 1}, inplace=True)
 
-        #Convert to tensor
+        # Convert to tensor
         xls_tensor = torch.tensor(data.values.astype(float))
-        
+
         return xls_tensor
 
-    def __getitem__(self, idx):     # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
+    def __getitem__(
+        self, idx
+    ):  # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
         mri_path, class_id = self.mri_data[idx]
         mri = nib.load(mri_path)
         mri_data = mri.get_fdata()
 
         xls = self.xls_data.iloc[idx]
 
-        #Convert xls data to tensor
+        # Convert xls data to tensor
         xls_tensor = self._xls_to_tensor(xls)
         mri_tensor = torch.from_numpy(mri_data).unsqueeze(0)
-        
+
         class_id = torch.tensor([class_id])
-        #Convert to one-hot and squeeze
+        # Convert to one-hot and squeeze
         class_id = torch.nn.functional.one_hot(class_id, num_classes=2).squeeze(0)
-        
-        #Convert to float
-        mri_tensor = mri_tensor.float()
-        xls_tensor = xls_tensor.float()
-        class_id = class_id.float()
+
+        # Convert to float
+        mri_tensor = mri_tensor.float().to(self.device)
+        xls_tensor = xls_tensor.float().to(self.device)
+        class_id = class_id.float().to(self.device)
 
         return (mri_tensor, xls_tensor), class_id
-    
-    
-def initalize_dataloaders(training_data, val_data, test_data, cuda_device=torch.device('cuda:0'), batch_size=64):
+
+
+def initalize_dataloaders(
+    training_data,
+    val_data,
+    test_data,
+    batch_size=64,
+):
     train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
     test_dataloader = DataLoader(test_data, batch_size=(batch_size // 4), shuffle=True)
     val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
-    return train_dataloader, val_dataloader, test_dataloader
+    return train_dataloader, val_dataloader, test_dataloader

+ 55 - 0
utils/ensemble.py

@@ -0,0 +1,55 @@
+import torch
+import os
+from glob import glob
+
+
+# This file contains functions to ensemble a folder of models and evaluate them on a test set, with included uncertainty estimation.
+
+
+def load_models(folder, device):
+    glob_path = os.path.join(folder, "*.pt")
+    model_files = glob(glob_path)
+
+    models = []
+    model_descs = []
+
+    for model_file in model_files:
+        model = torch.load(model_file, map_location=device)
+        models.append(model)
+        model_descs.append(os.path.basename(model_file))
+
+    return models, model_descs
+
+
+def ensemble_predict(models, input):
+    predictions = []
+    for model in models:
+        model.eval()
+        with torch.no_grad():
+            # Apply model and extract positive class predictions
+            output = model(input)[:, 1]
+            predictions.append(output)
+
+    # Calculate mean and variance of predictions
+    predictions = torch.stack(predictions)
+    mean = predictions.mean()
+    variance = predictions.var()
+
+    return mean, variance
+
+
+def ensemble_predict_strict_classes(models, input):
+    predictions = []
+    for model in models:
+        model.eval()
+        with torch.no_grad():
+            # Apply model and extract prediction
+            output = model(input)
+            _, predicted = torch.max(output.data, 1)
+            predictions.append(predicted)
+
+    predictions = torch.stack(predictions)
+    pos_votes = (predictions == 1).sum()
+    neg_votes = (predictions == 0).sum()
+
+    return pos_votes / len(models), pos_votes, neg_votes

+ 0 - 6
utils/models/cnn.py

@@ -1,12 +1,6 @@
 from torch import nn
-from torchvision.transforms import ToTensor
-import os
-import pandas as pd
-import numpy as np
 import utils.models.layers as ly
-
 import torch
-import torchvision
 
 
 class Parameters:

+ 0 - 6
utils/models/layers.py

@@ -1,11 +1,5 @@
 from torch import nn
-from torchvision.transforms import ToTensor
-import os
-import pandas as pd
-import numpy as np
-
 import torch
-import torchvision
 
 
 class SepConv3d(nn.Module):

+ 10 - 0
utils/system.py

@@ -0,0 +1,10 @@
+import torch
+
+
+# Forces torch to initialize cuDNN
+# From StackOverflow https://stackoverflow.com/questions/66588715
+def force_init_cudnn(dev=torch.device("cuda:0")):
+    s = 32
+    torch.nn.functional.conv2d(
+        torch.zeros(s, s, s, s, device=dev), torch.zeros(s, s, s, s, device=dev)
+    )

+ 7 - 0
utils/testing.py

@@ -0,0 +1,7 @@
+from utils.training import evaluate_accuracy
+
+
+def test_model(model, test_loader, config):
+    accuracy, predictions, actual = evaluate_accuracy(model, test_loader)
+
+    return accuracy

+ 9 - 10
utils/training.py

@@ -1,8 +1,4 @@
 import torch
-import torch.nn as nn
-import torch.optim as optim
-import torchvision
-from torch.utils.data import DataLoader
 import pandas as pd
 from tqdm import tqdm
 
@@ -14,7 +10,7 @@ def train_epoch(model, train_loader, val_loader, criterion, optimizer, config, e
     for i, (data, target) in tqdm(
         enumerate(train_loader),
         total=len(train_loader),
-        desc="Epoch " + str(epoch + 1) + "/" + str(config["training"]["max_epochs"]),
+        desc=" - Epoch " + str(epoch + 1) + "/" + str(config["training"]["max_epochs"]),
         unit="batch",
         disable=config["operation"]["silent"],
     ):
@@ -51,8 +47,10 @@ def evaluate_accuracy(model, loader):
         for data, target in loader:
             output = model(data)
             _, predicted = torch.max(output.data, 1)
+            _, expected = torch.max(target.data, 1)
+
             total += target.size(0)
-            correct += (predicted == target).sum().item()
+            correct += (predicted == expected).sum().item()
 
             out = output[:, 1].tolist()
             predictions.extend(out)
@@ -72,14 +70,15 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, config):
         train_loss, val_loss = train_epoch(
             model, train_loader, val_loader, criterion, optimizer, config, epoch
         )
-        if config["operation"]["silent"] is False:
-            print(
-                f"Epoch {epoch + 1} - Train Loss: {train_loss} - Val Loss: {val_loss}"
-            )
 
         train_acc, _, _ = evaluate_accuracy(model, train_loader)
         val_acc, _, _ = evaluate_accuracy(model, val_loader)
 
+        if config["operation"]["silent"] is False:
+            print(
+                f" --- Epoch {epoch + 1} - Train Loss: {round(train_loss, 3)}, Val Loss: {round(val_loss, 3)}, Train Acc: {round(train_acc, 3) * 100}%, Val Acc: {round(val_acc, 3) * 100}%"
+            )
+
         history.loc[epoch] = [train_loss, val_loss, train_acc, val_acc]
 
     return history