Selaa lähdekoodia

Refactored code and added graphing functions

Nicholas Schense 10 kuukautta sitten
vanhempi
commit
29da377f25
3 muutettua tiedostoa jossa 196 lisäystä ja 117 poistoa
  1. 2 1
      .gitignore
  2. 23 116
      main.py
  3. 171 0
      utils/training.py

+ 2 - 1
.gitignore

@@ -1,3 +1,4 @@
 .venv
 __pycache__
-utils/__pycache__
+utils/__pycache__
+saved_models/

+ 23 - 116
main.py

@@ -1,34 +1,17 @@
 import torch
-import torchvision
-
-# FOR DATA
-from utils.preprocess import prepare_datasets
-from utils.show_image import show_image
-from torch.utils.data import DataLoader
-from torchvision import datasets
-
 from torch import nn
-import torch.nn.functional as F
-from torchvision.transforms import ToTensor
-
-# import nonechucks as nc     # Used to load data in pytorch even when images are corrupted / unavailable (skips them)
 
-# FOR IMAGE VISUALIZATION
-import nibabel as nib
 
 # GENERAL PURPOSE
-import os
-import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-import glob
 from datetime import datetime
+import pandas as pd
 
 # FOR TRAINING
 import torch.optim as optim
 import utils.models as models
-import utils.layers as ly
-from tqdm import tqdm
+from utils.training import train_model, test_model, initalize_dataloaders, plot_results
+
 
 #Set Default GPU
 cuda_device = torch.device('cuda:1')
@@ -41,116 +24,40 @@ print("Pytorch Version: " + torch. __version__)
 val_split = 0.2     # % of val and test, rest will be train
 runs = 1
 epochs = 5
-time_stamp = timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 seeds = [np.random.randint(0, 1000) for _ in range(runs)]
 
 #Data Path
-mri_datapath = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'
+mri_path = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'
 
 #Local Path
 local_path = '/export/home/nschense/alzheimers/Pytorch_CNN-RNN'
 
 xls_path = local_path + '/LP_ADNIMERGE.csv'
-saved_model_path = local_path + 'saved_models/'
+saved_model_path = local_path + '/saved_models/'
+plot_path = local_path + '/plots/'
+training_record_path = local_path + '/training_records/'
 
 DEBUG = False
 
-# TODO: Datasets include multiple labels, such as medical info
-
-
-def evaluate_model(seed):
-    training_data, val_data, test_data = prepare_datasets(mri_datapath, xls_path, val_split, seed)
-    
-
-    batch_size = 64
-
-    # Create data loaders
-    train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
-    test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
-    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
-
-    #Print Shape of Image Data
-    print("Shape of MRI Data: ", training_data[0][0].shape)
-    print("Shape of XLS Data: ", training_data[0][1].shape)
-
-    #Print Training Data Length
-    print("Length of Training Data: ", len(train_dataloader))
-
-
-    print("--- INITIALIZING MODEL ---")
-    model_CNN = models.CNN_Net(1, 2, 0.5).to(cuda_device)
-    criterion = nn.BCELoss()
-    optimizer = optim.Adam(model_CNN.parameters(), lr=0.001)
-    print("Seed: ", seed)
-    epoch_number = 0
-
-    print("--- TRAINING MODEL ---")
-    for epoch in range(epochs):
-        running_loss = 0.0
-        length = len(train_dataloader)
-        for i, data in tqdm(enumerate(train_dataloader, 0), total=length, desc="Epoch " + str(epoch), unit="batch"):
-            mri, xls, label = data
-
-            optimizer.zero_grad()
-
-            mri = mri.to(cuda_device).float()
-            xls = xls.to(cuda_device).float()
-            label = label.to(cuda_device).float()
-
-
-            outputs = model_CNN((mri, xls))
-
-            if DEBUG:
-                print(outputs.shape, label.shape)
-
-            loss = criterion(outputs, label)
-            loss.backward()
-            optimizer.step()
-
-            running_loss += loss.item()
-            if i % 1000 == 999:
-                print("Epoch: ", epoch_number, "Batch: ", i+1, "Loss: ", running_loss / 1000, "Accuracy: ", )
-                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000))
-                running_loss = 0.0
-        epoch_number += 1
-
-
-    print("--- TESTING MODEL ---")
-    #Test model
-    correct = 0
-    total = 0
-
-    with torch.no_grad():
-        length = len(test_dataloader)
-        for i, data in tqdm(enumerate(test_dataloader, 0), total=length, desc="Testing", unit="batch"):
-            mri, xls, label = data
-
-            mri = mri.to(cuda_device).float()
-            xls = xls.to(cuda_device).float()
-            label = label.to(cuda_device).float()
-
-
-            outputs = model_CNN((mri, xls))
-
-            if DEBUG:
-                print(outputs.shape, label.shape)
-
-            _, predicted = torch.max(outputs.data, 1)
-            _, labels = torch.max(label.data, 1)
-
-            if DEBUG:
-                print("Predicted: ", predicted)
-                print("Labels: ", labels)
-
-
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-
-    print("Model Accuracy: ", 100 * correct / total)
+model_CNN = models.CNN_Net(1, 2, 0.5).to(cuda_device)
+criterion = nn.BCELoss()
+optimizer = optim.Adam(model_CNN.parameters(), lr=0.001)
 
         
 for seed in seeds:
-    evaluate_model(seed)
+    time_stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 
+    train_loader, val_loader, test_loader = initalize_dataloaders(mri_path, xls_path, val_split, seed, cuda_device=cuda_device)
+    train_results = train_model(model_CNN, seed, time_stamp, epochs, train_loader, val_loader, saved_model_path, "CNN", optimizer, criterion, cuda_device=cuda_device)
+    test_model(model_CNN, test_loader, cuda_device=cuda_device)
+    
+    #Plot results
+    plot_results(train_results["train_acc"], train_results["train_loss"], train_results["val_acc"], train_results["val_loss"], "CNN", time_stamp, plot_path)
+    
+    #Save training results
+    train_results.to_csv(training_record_path + "CNN_t-" + time_stamp + "_s-" + str(seed) + "_e-" + str(epochs) + ".csv")
+    
+    
+    
 
 print("--- END ---")

+ 171 - 0
utils/training.py

@@ -0,0 +1,171 @@
+import torch
+from tqdm import tqdm
+import os
+from utils.preprocess import prepare_datasets
+from torch.utils.data import DataLoader
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+
+
+
+def train_model(model, seed, timestamp, epochs, train_loader, val_loader, saved_model_path, model_name, optimizer, criterion, cuda_device=torch.device('cuda:0')):
+    #Print Shape of Image Data
+
+    #Print Training Data Length
+    print("Length of Training Data: ", len(train_loader))
+
+
+    print("--- INITIALIZING MODEL ---")
+    print("Seed: ", seed)
+    epoch_number = 0
+
+    print("--- TRAINING MODEL ---")
+    train_losses = []
+    train_accs = []
+    val_losses = []
+    val_accs = []
+    
+    for epoch in range(epochs):
+        train_loss = 0
+        train_incc = 0
+        train_corr = 0
+        
+        #Training
+        length = len(train_loader)
+        for _, data in tqdm(enumerate(train_loader, 0), total=length, desc="Epoch " + str(epoch), unit="batch"):
+            mri, xls, label = data
+
+            optimizer.zero_grad()
+
+            mri = mri.to(cuda_device).float()
+            xls = xls.to(cuda_device).float()
+            label = label.to(cuda_device).float()
+
+            outputs = model((mri, xls))
+
+            loss = criterion(outputs, label)
+            loss.backward()
+            optimizer.step()
+            
+            train_loss += loss.item()
+            
+            #Calculate Correct and Incorrect Predictions
+            _, predicted = torch.max(outputs.data, 1)
+            _, labels = torch.max(label.data, 1)
+            
+            train_corr += (predicted == labels).sum().item()
+            train_incc += (predicted != labels).sum().item()
+            
+        train_losses.append(train_loss)
+        train_accs.append(train_corr / (train_corr + train_incc))
+        
+        
+        #Validation
+        with torch.no_grad():
+            val_loss = 0
+            val_incc = 0
+            val_corr = 0
+            
+            for _, data in enumerate(val_loader, 0):
+                mri, xls, label = data
+
+                mri = mri.to(cuda_device).float()
+                xls = xls.to(cuda_device).float()
+                label = label.to(cuda_device).float()
+
+                outputs = model((mri, xls))
+
+                loss = criterion(outputs, label)
+                val_loss += loss.item()
+
+                _, predicted = torch.max(outputs.data, 1)
+                _, labels = torch.max(label.data, 1)
+
+                val_corr += (predicted == labels).sum().item()
+                val_incc += (predicted != labels).sum().item()
+            
+        val_losses.append(val_loss)
+        val_accs.append(val_corr / (val_corr + val_incc))
+        
+        epoch_number += 1
+
+    print("--- SAVING MODEL ---")
+    if not os.path.exists(saved_model_path):
+        os.makedirs(saved_model_path)
+    
+    torch.save(model.state_dict(), saved_model_path + model_name + "_t-" + timestamp + "_s-" + str(seed) + "_e-" + str(epochs) + ".pt")
+    
+    #Create dataframe with training and validation losses and accuracies, set index to epoch
+    df = pd.DataFrame()
+    df["train_loss"] = train_losses
+    df["train_acc"] = train_accs
+    df["val_loss"] = val_losses
+    df["val_acc"] = val_accs
+    df.index.name = "epoch"
+    
+    return df
+    
+def test_model(model, test_loader, cuda_device=torch.device('cuda:0')):
+    print("--- TESTING MODEL ---")
+    #Test model
+    correct = 0
+    total = 0
+
+    with torch.no_grad():
+        length = len(test_loader)
+        for i, data in tqdm(enumerate(test_loader, 0), total=length, desc="Testing", unit="batch"):
+            mri, xls, label = data
+
+            mri = mri.to(cuda_device).float()
+            xls = xls.to(cuda_device).float()
+            label = label.to(cuda_device).float()
+
+            outputs = model((mri, xls))
+
+            _, predicted = torch.max(outputs.data, 1)
+            _, labels = torch.max(label.data, 1)
+
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    print("Model Accuracy: ", 100 * correct / total)
+
+def initalize_dataloaders(mri_path, xls_path, val_split, seed, cuda_device=torch.device('cuda:0')):
+    training_data, val_data, test_data = prepare_datasets(mri_path, xls_path, val_split, seed)
+
+    batch_size = 64
+    train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
+    test_dataloader = DataLoader(test_data, batch_size=(batch_size // 4), shuffle=True, generator=torch.Generator(device=cuda_device))
+    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
+
+    return train_dataloader, val_dataloader, test_dataloader
+
+
+def plot_results(train_acc, train_loss, val_acc, val_loss, model_name, timestamp, plot_path):
+    #Create 2 plots, one for accuracy and one for loss
+    
+    #Accuracy Plot
+    plt.figure()
+    plt.plot(train_acc, label="Training Accuracy")
+    plt.plot(val_acc, label="Validation Accuracy")
+    plt.xlabel("Epoch")
+    plt.ylabel("Accuracy")
+    plt.title("Accuracy of " + model_name + " Model: " + timestamp)
+    plt.legend()
+    plt.savefig(plot_path + model_name + "_t-" + timestamp + "_acc.png")
+    
+    #Loss Plot
+    plt.figure()
+    plt.plot(train_loss, label="Training Loss")
+    plt.plot(val_loss, label="Validation Loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("Loss")
+    plt.title("Loss of " + model_name + " Model: " + timestamp)
+    plt.legend()
+    plt.savefig(plot_path + model_name + "_t-" + timestamp + "_loss.png")
+    
+    
+    
+