1 jaar geleden · e410eeef6e
--- a/ROC.png
+++ b/ROC.png
--- a/avgloss_epoch_curve.png
+++ b/avgloss_epoch_curve.png
--- a/main.py
+++ b/main.py
@@ -22,11 +22,13 @@ import pandas as pd
 
				 import numpy as np
			
 
				 import matplotlib.pyplot as plt
			
 
				 import glob
			
 
				+import platform
			
 
				 
			
 
				 
			
 
				 
			
 
				 print("--- RUNNING ---")
			
 
				 print("Pytorch Version: " + torch. __version__)
			
 
				+print("Python Version: " + platform.python_version())
			
 
				 
			
 
				 # LOADING DATA
			
 
				 # data & training properties:
			
@@ -85,24 +87,26 @@ val_dataloader = DataLoader(val_data, batch_size=properties['batch_size'], shuff
 
				 
			
 
				 
			
 
				 # Display 4 images and labels.
			
 
				-# x = 1
			
 
				-# while x < 1:
			
 
				-#     train_features, train_labels = next(iter(train_dataloader))
			
 
				-#     print(f"Feature batch shape: {train_features.size()}")
			
 
				-#     img = train_features[0].squeeze()
			
 
				-#     print(f"Feature batch shape: {img.size()}")
			
 
				-#     image = img[:, :, 40]
			
 
				-#     print(f"Feature batch shape: {image.size()}")
			
 
				-#     label = train_labels[0]
			
 
				-#     print(f"Label: {label}")
			
 
				-#     plt.imshow(image, cmap="gray")
			
 
				-#     plt.show()
			
 
				-#     x = x+1
			
 
				+x = 0
			
 
				+while x < 0:
			
 
				+    train_features, train_labels = next(iter(train_dataloader))
			
 
				+    print(f"Feature batch shape: {train_features.size()}")
			
 
				+    img = train_features[0].squeeze()
			
 
				+    print(f"Feature batch shape: {img.size()}")
			
 
				+    image = img[:, :, 40]
			
 
				+    print(f"Feature batch shape: {image.size()}")
			
 
				+    label = train_labels[0]
			
 
				+    print(f"Label: {label}")
			
 
				+    plt.imshow(image, cmap="gray")
			
 
				+    plt.savefig(f"./Image{x}_IS:{label}.png")
			
 
				+    plt.show()
			
 
				+
			
 
				+    x = x+1
			
 
				 
			
 
				 
			
 
				 train = False
			
 
				 predict = False
			
 
				-CNN = CNN_Net(train_dataloader, prps=properties, final_layer_size=2)
			
 
				+CNN = CNN_Net(prps=properties, final_layer_size=2)
			
 
				 CNN.cuda()
			
 
				 
			
 
				 # RUN CNN
			
--- a/utils/CNN.py
+++ b/utils/CNN.py
@@ -5,28 +5,21 @@ import torch.nn as nn
 
				 import utils.CNN_Layers as CustomLayers
			
 
				 import torch.nn.functional as F
			
 
				 import torch.optim as optim
			
 
				-import utils.CNN_methods as CNN
			
 
				 import pandas as pd
			
 
				 import matplotlib.pyplot as plt
			
 
				 import time
			
 
				 import numpy as np
			
 
				-# from sklearn.metrics import roc_curve, auc
			
 
				+from sklearn.metrics import roc_curve, auc
			
 
				 
			
 
				 class CNN_Net(nn.Module):
			
 
				-    def __init__(self, input, prps, final_layer_size=5):
			
 
				+    def __init__(self, prps, final_layer_size=5):
			
 
				         super(CNN_Net, self).__init__()
			
 
				         self.final_layer_size = final_layer_size
			
 
				         self.device = device('cuda:0' if cuda.is_available() else 'cpu')
			
 
				         print("CNN Initialized. Using: " + str(self.device))
			
 
				 
			
 
				-        # GETS FIRST IMAGE FOR SIZE
			
 
				-        data_iter = iter(input)
			
 
				-        first_batch = next(data_iter)
			
 
				-        first_features = first_batch[0]
			
 
				-        image = first_features[0]
			
 
				-
			
 
				         # LAYERS
			
 
				-        print(f"CNN Model Initialization. Input size: {image.size()}")
			
 
				+        print(f"CNN Model Initialization")
			
 
				         self.conv1 = CustomLayers.Conv_elu_maxpool_drop(1, 192, (11, 13, 11), stride=(4,4,4), pool=True, prps=prps)
			
 
				         self.conv2 = CustomLayers.Conv_elu_maxpool_drop(192, 384, (5, 6, 5), stride=(1,1,1), pool=True, prps=prps)
			
 
				         self.conv3_mid_flow = CustomLayers.Mid_flow(384, 384, prps=prps)
			
@@ -107,19 +100,14 @@ class CNN_Net(nn.Module):
 
				         losses.to_csv('./cnn_net_data.csv')
			
 
				 
			
 
				         # MAKES EPOCH VS AVG LOSS GRAPH
			
 
				-        plt.plot(losses['Epoch'], losses['Avg_loss'])
			
 
				+        plt.plot(losses['Epoch'], losses['Avg_loss'], label="Loss on Training")
			
 
				         plt.xlabel('Epoch')
			
 
				         plt.ylabel('Average Loss')
			
 
				-        plt.title('Average Loss vs Epoch On Training')
			
 
				-        plt.savefig('./avgloss_epoch_curve.png')
			
 
				-        plt.show()
			
 
				+        plt.title('Loss vs Epoch On Training & Validation data')
			
 
				 
			
 
				         # MAKES EPOCH VS VALIDATION LOSS GRAPH
			
 
				-        plt.plot(losses['Epoch'], losses['Val_loss'])
			
 
				-        plt.xlabel('Epoch')
			
 
				-        plt.ylabel('Validation Loss')
			
 
				-        plt.title('Validation Loss vs Epoch On Training')
			
 
				-        plt.savefig('./valloss_epoch_curve.png')
			
 
				+        plt.plot(losses['Epoch'], losses['Val_loss'], label="Loss on Validation")
			
 
				+        plt.savefig('./avgloss_epoch_curve.png')
			
 
				         plt.show()
			
 
				 
			
 
				         torch.save(self.state_dict(), PATH)
			
@@ -161,35 +149,17 @@ class CNN_Net(nn.Module):
 
				         else:
			
 
				             # ROC
			
 
				             thresholds = np.linspace(0, 1, num=50)
			
 
				-            tpr = []
			
 
				-            fpr = []
			
 
				-            acc = []
			
 
				-
			
 
				-
			
 
				-            true_labels = np.array(true_labels)
			
 
				 
			
 
				-            for threshold in thresholds:
			
 
				-                # Thresholding the predictions (meaning all predictions above threshold are considered positive)
			
 
				-                thresholded_predictions = (predictions >= threshold).astype(int)
			
 
				+            # Calculate TPR and FPR
			
 
				+            fpr, tpr, thresholds = roc_curve(true_labels, predictions)
			
 
				 
			
 
				-                # Calculating true positives, false positives, true negatives, false negatives
			
 
				-                true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
			
 
				-                false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
			
 
				-                true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
			
 
				-                false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
			
 
				+            # Calculate AUC
			
 
				+            roc_auc = auc(fpr, tpr)
			
 
				 
			
 
				-                accuracy  = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)
			
 
				-
			
 
				-                # Calculate TPR and FPR
			
 
				-                tpr.append(true_positives / (true_positives + false_negatives))
			
 
				-                fpr.append(false_positives / (false_positives + true_negatives))
			
 
				-                acc.append(accuracy)
			
 
				-
			
 
				-
			
 
				-            plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
			
 
				+            plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC: {roc_auc})')
			
 
				             plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
			
 
				-            plt.xlim([0.0, 1.0])
			
 
				-            plt.ylim([0.0, 1.0])
			
 
				+            plt.xlim([0.0, 1.005])
			
 
				+            plt.ylim([0.0, 1.005])
			
 
				 
			
 
				             plt.xlabel('False Positive Rate (1 - Specificity)')
			
 
				             plt.ylabel('True Positive Rate (Sensitivity)')
			
@@ -198,18 +168,6 @@ class CNN_Net(nn.Module):
 
				             plt.savefig('./ROC.png')
			
 
				             plt.show()
			
 
				 
			
 
				-            plt.plot(thresholds, acc)
			
 
				-            plt.xlabel('Thresholds')
			
 
				-            plt.ylabel('Accuracy')
			
 
				-            plt.title('Accuracy vs thresholds')
			
 
				-            plt.savefig('./acc.png')
			
 
				-            plt.show()
			
 
				-
			
 
				-
			
 
				-            # ROC ATTEMPT 2
			
 
				-            # fprRoc, tprRoc = roc_curve(true_labels, predictions)
			
 
				-            # plt.plot(fprRoc, tprRoc)
			
 
				-
			
 
				         self.train()
			
 
				 
			
 
				         return(loss.item())
			
--- a/utils/CNN_Layers.py
+++ b/utils/CNN_Layers.py
@@ -4,7 +4,6 @@ from torch import add
 
				 import torch.nn as nn
			
 
				 import torch.nn.functional as F
			
 
				 import torch.optim as optim
			
 
				-import utils.CNN_methods as CNN
			
 
				 import copy
			
 
				 
			
 
				 class Conv_elu_maxpool_drop(nn.Module):
			
--- a/utils/Graphs.py
+++ b/utils/Graphs.py
@@ -0,0 +1,22 @@
 
				+import matplotlib.pyplot as plt
			
 
				+import pandas as pd
			
 
				+
			
 
				+df = pd.read_csv("./cnn_net_data.csv")
			
 
				+
			
 
				+# To Regraph ROC, run main.py without training
			
 
				+# It will run validation sett and create it based on latest trained model
			
 
				+
			
 
				+# MAKES EPOCH VS AVG LOSS GRAPH
			
 
				+plt.plot(df['Epoch'], df['Avg_loss'], label="Loss on Training")
			
 
				+plt.xlabel('Epoch')
			
 
				+plt.ylabel('Average Loss')
			
 
				+plt.title('Loss vs Epoch On Training & Validation data')
			
 
				+
			
 
				+# MAKES EPOCH VS VALIDATION LOSS GRAPH
			
 
				+plt.plot(df['Epoch'], df['Val_loss'], label="Loss on Validation")
			
 
				+plt.savefig('./avgloss_epoch_curve.png')
			
 
				+plt.legend()
			
 
				+plt.show()
			
 
				+
			
 
				+
			
 
				+# ACCURACY VS THRESHOLD GRAPH
			
--- a/utils/K-fold.py
+++ b/utils/K-fold.py
@@ -0,0 +1,211 @@
 
				+import os
			
 
				+import torch
			
 
				+from utils.CNN import CNN_Net
			
 
				+from torch import nn
			
 
				+from torch.utils.data import DataLoader, ConcatDataset
			
 
				+from torchvision import transforms
			
 
				+from sklearn.model_selection import KFold, StratifiedKFold
			
 
				+from utils.preprocess import prepare_datasets, prepare_predict
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+
			
 
				+def reset_weights(m):
			
 
				+    '''
			
 
				+      Try resetting model weights to avoid
			
 
				+      weight leakage.
			
 
				+    '''
			
 
				+    for layer in m.children():
			
 
				+        if hasattr(layer, 'reset_parameters'):
			
 
				+            print(f'Reset trainable parameters of layer = {layer}')
			
 
				+            layer.reset_parameters()
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    # Might have to replace datapaths or separate between training and testing
			
 
				+    model_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN'
			
 
				+    CNN_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/cnn_net.pth'  # cnn_net.pth
			
 
				+    # mri_datapath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/PET_volumes_customtemplate_float32/'   # Small Test
			
 
				+    mri_datapath = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'  # Real data
			
 
				+    annotations_datapath = './data/data_wnx1/rschuurs/Pytorch_CNN-RNN/LP_ADNIMERGE.csv'
			
 
				+
			
 
				+    properties = {
			
 
				+        "batch_size": 6,
			
 
				+        "padding": 0,
			
 
				+        "dilation": 1,
			
 
				+        "groups": 1,
			
 
				+        "bias": True,
			
 
				+        "padding_mode": "zeros",
			
 
				+        "drop_rate": 0
			
 
				+    }
			
 
				+
			
 
				+    # Configuration options
			
 
				+    k_folds = 5     # TODO
			
 
				+    num_epochs = 1
			
 
				+    loss_function = nn.CrossEntropyLoss()
			
 
				+
			
 
				+    # For fold results
			
 
				+    results = {}
			
 
				+
			
 
				+    # Set fixed random number seed
			
 
				+    torch.manual_seed(42)
			
 
				+
			
 
				+    training_data, val_data, test_data = prepare_datasets(mri_datapath, val_split=0.2, seed=12)
			
 
				+
			
 
				+    dataset = ConcatDataset([training_data, test_data])
			
 
				+
			
 
				+    # Define the K-fold Cross Validator
			
 
				+    kfold = KFold(n_splits=k_folds, shuffle=True)
			
 
				+
			
 
				+    # Start print
			
 
				+    print('--------------------------------')
			
 
				+
			
 
				+    # K-fold Cross Validation model evaluation
			
 
				+    for fold, (train_ids, test_ids) in enumerate(kfold.split(training_data)):
			
 
				+
			
 
				+        # Print
			
 
				+        print(f'FOLD {fold}')
			
 
				+        print('--------------------------------')
			
 
				+
			
 
				+        # Sample elements randomly from a given list of ids, no replacement.
			
 
				+        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
			
 
				+        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
			
 
				+
			
 
				+        # Define data loaders for training and testing data in this fold
			
 
				+        trainloader = torch.utils.data.DataLoader(
			
 
				+            dataset,
			
 
				+            batch_size=10, sampler=train_subsampler)
			
 
				+        testloader = torch.utils.data.DataLoader(
			
 
				+            dataset,
			
 
				+            batch_size=10, sampler=test_subsampler)
			
 
				+
			
 
				+        # Init the neural network
			
 
				+        network = CNN_Net(prps=properties, final_layer_size=2)
			
 
				+        network.apply(reset_weights)
			
 
				+
			
 
				+        # Initialize optimizer
			
 
				+        optimizer = torch.optim.Adam(network.parameters(), lr=1e-5)
			
 
				+
			
 
				+        # Run the training loop for defined number of epochs
			
 
				+        for epoch in range(0, num_epochs):
			
 
				+
			
 
				+            # Print epoch
			
 
				+            print(f'Starting epoch {epoch + 1}')
			
 
				+
			
 
				+            # Set current loss value
			
 
				+            current_loss = 0.0
			
 
				+
			
 
				+            # Iterate over the DataLoader for training data
			
 
				+            for i, data in enumerate(trainloader, 0):
			
 
				+
			
 
				+                # Get inputs
			
 
				+                inputs, targets = data
			
 
				+
			
 
				+                # Zero the gradients
			
 
				+                optimizer.zero_grad()
			
 
				+
			
 
				+                # Perform forward pass
			
 
				+                outputs = network(inputs)
			
 
				+
			
 
				+                # Compute loss
			
 
				+                loss = loss_function(outputs, targets)
			
 
				+
			
 
				+                # Perform backward pass
			
 
				+                loss.backward()
			
 
				+
			
 
				+                # Perform optimization
			
 
				+                optimizer.step()
			
 
				+
			
 
				+                # Print statistics
			
 
				+                current_loss += loss.item()
			
 
				+                if i % 500 == 499:
			
 
				+                    print('Loss after mini-batch %5d: %.3f' %
			
 
				+                          (i + 1, current_loss / 500))
			
 
				+                    current_loss = 0.0
			
 
				+
			
 
				+        # Process is complete.
			
 
				+        print('Training process has finished. Saving trained model.')
			
 
				+
			
 
				+        # Print about testing
			
 
				+        print('Starting testing')
			
 
				+
			
 
				+        # Saving the model
			
 
				+        save_path = f'./model-fold-{fold}.pth'
			
 
				+        torch.save(network.state_dict(), save_path)
			
 
				+
			
 
				+        # Evaluation for this fold
			
 
				+        correct, total = 0, 0
			
 
				+        with torch.no_grad():
			
 
				+
			
 
				+            predictions = []
			
 
				+            true_labels = []
			
 
				+
			
 
				+            # Iterate over the test data and generate predictions
			
 
				+            for i, data in enumerate(testloader, 0):
			
 
				+                # Get inputs
			
 
				+                inputs, targets = data
			
 
				+
			
 
				+                # Generate outputs
			
 
				+                outputs = network(inputs)
			
 
				+
			
 
				+                # Set total and correct
			
 
				+                _, predicted = torch.max(outputs.data, 1)
			
 
				+                total += targets.size(0)
			
 
				+                correct += (predicted == targets).sum().item()
			
 
				+
			
 
				+                predictions.extend(outputs.data[:, 1].cpu().numpy())  # Grabs probability of positive
			
 
				+                true_labels.extend(targets.cpu().numpy())
			
 
				+
			
 
				+            # Print accuracy
			
 
				+            print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
			
 
				+            print('--------------------------------')
			
 
				+            results[fold] = 100.0 * (correct / total)
			
 
				+
			
 
				+
			
 
				+        # MAKES ROC CURVE
			
 
				+        thresholds = np.linspace(0, 1, num=50)
			
 
				+        tpr = []
			
 
				+        fpr = []
			
 
				+        acc = []
			
 
				+
			
 
				+        true_labels = np.array(true_labels)
			
 
				+
			
 
				+        for threshold in thresholds:
			
 
				+            # Thresholding the predictions (meaning all predictions above threshold are considered positive)
			
 
				+            thresholded_predictions = (predictions >= threshold).astype(int)
			
 
				+
			
 
				+            # Calculating true positives, false positives, true negatives, false negatives
			
 
				+            true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
			
 
				+            false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
			
 
				+            true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
			
 
				+            false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
			
 
				+
			
 
				+            accuracy = (true_positives + true_negatives) / (
			
 
				+                        true_positives + false_positives + true_negatives + false_negatives)
			
 
				+
			
 
				+            # Calculate TPR and FPR
			
 
				+            tpr.append(true_positives / (true_positives + false_negatives))
			
 
				+            fpr.append(false_positives / (false_positives + true_negatives))
			
 
				+            acc.append(accuracy)
			
 
				+
			
 
				+        plt.plot(fpr, tpr, lw=2, label=f'ROC Fold {fold}')
			
 
				+        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
			
 
				+        plt.xlim([0.0, 1.0])
			
 
				+        plt.ylim([0.0, 1.0])
			
 
				+
			
 
				+        plt.xlabel('False Positive Rate (1 - Specificity)')
			
 
				+        plt.ylabel('True Positive Rate (Sensitivity)')
			
 
				+        plt.title('Receiver Operating Characteristic (ROC) Curve')
			
 
				+        plt.legend(loc="lower right")
			
 
				+
			
 
				+    plt.savefig(f'./ROC_{k_folds}_Folds.png')
			
 
				+    plt.show()
			
 
				+
			
 
				+    # Print fold results
			
 
				+    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
			
 
				+    print('--------------------------------')
			
 
				+    sum = 0.0
			
 
				+    for key, value in results.items():
			
 
				+        print(f'Fold {key}: {value} %')
			
 
				+        sum += value
			
 
				+    print(f'Average: {sum / len(results.items())} %')
			
--- a/valloss_epoch_curve.png
+++ b/valloss_epoch_curve.png