Browse Source

Attempted K-fold (unsuccessful), new graphs

Ruben 5 months ago
parent
commit
e410eeef6e
8 changed files with 265 additions and 71 deletions
  1. BIN
      ROC.png
  2. BIN
      avgloss_epoch_curve.png
  3. 18 14
      main.py
  4. 14 56
      utils/CNN.py
  5. 0 1
      utils/CNN_Layers.py
  6. 22 0
      utils/Graphs.py
  7. 211 0
      utils/K-fold.py
  8. BIN
      valloss_epoch_curve.png

BIN
ROC.png


BIN
avgloss_epoch_curve.png


+ 18 - 14
main.py

@@ -22,11 +22,13 @@ import pandas as pd
 import numpy as np
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 import glob
 import glob
+import platform
 
 
 
 
 
 
 print("--- RUNNING ---")
 print("--- RUNNING ---")
 print("Pytorch Version: " + torch. __version__)
 print("Pytorch Version: " + torch. __version__)
+print("Python Version: " + platform.python_version())
 
 
 # LOADING DATA
 # LOADING DATA
 # data & training properties:
 # data & training properties:
@@ -85,24 +87,26 @@ val_dataloader = DataLoader(val_data, batch_size=properties['batch_size'], shuff
 
 
 
 
 # Display 4 images and labels.
 # Display 4 images and labels.
-# x = 1
-# while x < 1:
-#     train_features, train_labels = next(iter(train_dataloader))
-#     print(f"Feature batch shape: {train_features.size()}")
-#     img = train_features[0].squeeze()
-#     print(f"Feature batch shape: {img.size()}")
-#     image = img[:, :, 40]
-#     print(f"Feature batch shape: {image.size()}")
-#     label = train_labels[0]
-#     print(f"Label: {label}")
-#     plt.imshow(image, cmap="gray")
-#     plt.show()
-#     x = x+1
+x = 0
+while x < 0:
+    train_features, train_labels = next(iter(train_dataloader))
+    print(f"Feature batch shape: {train_features.size()}")
+    img = train_features[0].squeeze()
+    print(f"Feature batch shape: {img.size()}")
+    image = img[:, :, 40]
+    print(f"Feature batch shape: {image.size()}")
+    label = train_labels[0]
+    print(f"Label: {label}")
+    plt.imshow(image, cmap="gray")
+    plt.savefig(f"./Image{x}_IS:{label}.png")
+    plt.show()
+
+    x = x+1
 
 
 
 
 train = False
 train = False
 predict = False
 predict = False
-CNN = CNN_Net(train_dataloader, prps=properties, final_layer_size=2)
+CNN = CNN_Net(prps=properties, final_layer_size=2)
 CNN.cuda()
 CNN.cuda()
 
 
 # RUN CNN
 # RUN CNN

+ 14 - 56
utils/CNN.py

@@ -5,28 +5,21 @@ import torch.nn as nn
 import utils.CNN_Layers as CustomLayers
 import utils.CNN_Layers as CustomLayers
 import torch.nn.functional as F
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim as optim
-import utils.CNN_methods as CNN
 import pandas as pd
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 import time
 import time
 import numpy as np
 import numpy as np
-# from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import roc_curve, auc
 
 
 class CNN_Net(nn.Module):
 class CNN_Net(nn.Module):
-    def __init__(self, input, prps, final_layer_size=5):
+    def __init__(self, prps, final_layer_size=5):
         super(CNN_Net, self).__init__()
         super(CNN_Net, self).__init__()
         self.final_layer_size = final_layer_size
         self.final_layer_size = final_layer_size
         self.device = device('cuda:0' if cuda.is_available() else 'cpu')
         self.device = device('cuda:0' if cuda.is_available() else 'cpu')
         print("CNN Initialized. Using: " + str(self.device))
         print("CNN Initialized. Using: " + str(self.device))
 
 
-        # GETS FIRST IMAGE FOR SIZE
-        data_iter = iter(input)
-        first_batch = next(data_iter)
-        first_features = first_batch[0]
-        image = first_features[0]
-
         # LAYERS
         # LAYERS
-        print(f"CNN Model Initialization. Input size: {image.size()}")
+        print(f"CNN Model Initialization")
         self.conv1 = CustomLayers.Conv_elu_maxpool_drop(1, 192, (11, 13, 11), stride=(4,4,4), pool=True, prps=prps)
         self.conv1 = CustomLayers.Conv_elu_maxpool_drop(1, 192, (11, 13, 11), stride=(4,4,4), pool=True, prps=prps)
         self.conv2 = CustomLayers.Conv_elu_maxpool_drop(192, 384, (5, 6, 5), stride=(1,1,1), pool=True, prps=prps)
         self.conv2 = CustomLayers.Conv_elu_maxpool_drop(192, 384, (5, 6, 5), stride=(1,1,1), pool=True, prps=prps)
         self.conv3_mid_flow = CustomLayers.Mid_flow(384, 384, prps=prps)
         self.conv3_mid_flow = CustomLayers.Mid_flow(384, 384, prps=prps)
@@ -107,19 +100,14 @@ class CNN_Net(nn.Module):
         losses.to_csv('./cnn_net_data.csv')
         losses.to_csv('./cnn_net_data.csv')
 
 
         # MAKES EPOCH VS AVG LOSS GRAPH
         # MAKES EPOCH VS AVG LOSS GRAPH
-        plt.plot(losses['Epoch'], losses['Avg_loss'])
+        plt.plot(losses['Epoch'], losses['Avg_loss'], label="Loss on Training")
         plt.xlabel('Epoch')
         plt.xlabel('Epoch')
         plt.ylabel('Average Loss')
         plt.ylabel('Average Loss')
-        plt.title('Average Loss vs Epoch On Training')
-        plt.savefig('./avgloss_epoch_curve.png')
-        plt.show()
+        plt.title('Loss vs Epoch On Training & Validation data')
 
 
         # MAKES EPOCH VS VALIDATION LOSS GRAPH
         # MAKES EPOCH VS VALIDATION LOSS GRAPH
-        plt.plot(losses['Epoch'], losses['Val_loss'])
-        plt.xlabel('Epoch')
-        plt.ylabel('Validation Loss')
-        plt.title('Validation Loss vs Epoch On Training')
-        plt.savefig('./valloss_epoch_curve.png')
+        plt.plot(losses['Epoch'], losses['Val_loss'], label="Loss on Validation")
+        plt.savefig('./avgloss_epoch_curve.png')
         plt.show()
         plt.show()
 
 
         torch.save(self.state_dict(), PATH)
         torch.save(self.state_dict(), PATH)
@@ -161,35 +149,17 @@ class CNN_Net(nn.Module):
         else:
         else:
             # ROC
             # ROC
             thresholds = np.linspace(0, 1, num=50)
             thresholds = np.linspace(0, 1, num=50)
-            tpr = []
-            fpr = []
-            acc = []
-
-
-            true_labels = np.array(true_labels)
 
 
-            for threshold in thresholds:
-                # Thresholding the predictions (meaning all predictions above threshold are considered positive)
-                thresholded_predictions = (predictions >= threshold).astype(int)
+            # Calculate TPR and FPR
+            fpr, tpr, thresholds = roc_curve(true_labels, predictions)
 
 
-                # Calculating true positives, false positives, true negatives, false negatives
-                true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
-                false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
-                true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
-                false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
+            # Calculate AUC
+            roc_auc = auc(fpr, tpr)
 
 
-                accuracy  = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)
-
-                # Calculate TPR and FPR
-                tpr.append(true_positives / (true_positives + false_negatives))
-                fpr.append(false_positives / (false_positives + true_negatives))
-                acc.append(accuracy)
-
-
-            plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
+            plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC: {roc_auc})')
             plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
             plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
-            plt.xlim([0.0, 1.0])
-            plt.ylim([0.0, 1.0])
+            plt.xlim([0.0, 1.005])
+            plt.ylim([0.0, 1.005])
 
 
             plt.xlabel('False Positive Rate (1 - Specificity)')
             plt.xlabel('False Positive Rate (1 - Specificity)')
             plt.ylabel('True Positive Rate (Sensitivity)')
             plt.ylabel('True Positive Rate (Sensitivity)')
@@ -198,18 +168,6 @@ class CNN_Net(nn.Module):
             plt.savefig('./ROC.png')
             plt.savefig('./ROC.png')
             plt.show()
             plt.show()
 
 
-            plt.plot(thresholds, acc)
-            plt.xlabel('Thresholds')
-            plt.ylabel('Accuracy')
-            plt.title('Accuracy vs thresholds')
-            plt.savefig('./acc.png')
-            plt.show()
-
-
-            # ROC ATTEMPT 2
-            # fprRoc, tprRoc = roc_curve(true_labels, predictions)
-            # plt.plot(fprRoc, tprRoc)
-
         self.train()
         self.train()
 
 
         return(loss.item())
         return(loss.item())

+ 0 - 1
utils/CNN_Layers.py

@@ -4,7 +4,6 @@ from torch import add
 import torch.nn as nn
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim as optim
-import utils.CNN_methods as CNN
 import copy
 import copy
 
 
 class Conv_elu_maxpool_drop(nn.Module):
 class Conv_elu_maxpool_drop(nn.Module):

+ 22 - 0
utils/Graphs.py

@@ -0,0 +1,22 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+df = pd.read_csv("./cnn_net_data.csv")
+
+# To Regraph ROC, run main.py without training
+# It will run validation sett and create it based on latest trained model
+
+# MAKES EPOCH VS AVG LOSS GRAPH
+plt.plot(df['Epoch'], df['Avg_loss'], label="Loss on Training")
+plt.xlabel('Epoch')
+plt.ylabel('Average Loss')
+plt.title('Loss vs Epoch On Training & Validation data')
+
+# MAKES EPOCH VS VALIDATION LOSS GRAPH
+plt.plot(df['Epoch'], df['Val_loss'], label="Loss on Validation")
+plt.savefig('./avgloss_epoch_curve.png')
+plt.legend()
+plt.show()
+
+
+# ACCURACY VS THRESHOLD GRAPH

+ 211 - 0
utils/K-fold.py

@@ -0,0 +1,211 @@
+import os
+import torch
+from utils.CNN import CNN_Net
+from torch import nn
+from torch.utils.data import DataLoader, ConcatDataset
+from torchvision import transforms
+from sklearn.model_selection import KFold, StratifiedKFold
+from utils.preprocess import prepare_datasets, prepare_predict
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def reset_weights(m):
+    '''
+      Try resetting model weights to avoid
+      weight leakage.
+    '''
+    for layer in m.children():
+        if hasattr(layer, 'reset_parameters'):
+            print(f'Reset trainable parameters of layer = {layer}')
+            layer.reset_parameters()
+
+if __name__ == '__main__':
+
+    # Might have to replace datapaths or separate between training and testing
+    model_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN'
+    CNN_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/cnn_net.pth'  # cnn_net.pth
+    # mri_datapath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/PET_volumes_customtemplate_float32/'   # Small Test
+    mri_datapath = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'  # Real data
+    annotations_datapath = './data/data_wnx1/rschuurs/Pytorch_CNN-RNN/LP_ADNIMERGE.csv'
+
+    properties = {
+        "batch_size": 6,
+        "padding": 0,
+        "dilation": 1,
+        "groups": 1,
+        "bias": True,
+        "padding_mode": "zeros",
+        "drop_rate": 0
+    }
+
+    # Configuration options
+    k_folds = 5     # TODO
+    num_epochs = 1
+    loss_function = nn.CrossEntropyLoss()
+
+    # For fold results
+    results = {}
+
+    # Set fixed random number seed
+    torch.manual_seed(42)
+
+    training_data, val_data, test_data = prepare_datasets(mri_datapath, val_split=0.2, seed=12)
+
+    dataset = ConcatDataset([training_data, test_data])
+
+    # Define the K-fold Cross Validator
+    kfold = KFold(n_splits=k_folds, shuffle=True)
+
+    # Start print
+    print('--------------------------------')
+
+    # K-fold Cross Validation model evaluation
+    for fold, (train_ids, test_ids) in enumerate(kfold.split(training_data)):
+
+        # Print
+        print(f'FOLD {fold}')
+        print('--------------------------------')
+
+        # Sample elements randomly from a given list of ids, no replacement.
+        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
+        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
+
+        # Define data loaders for training and testing data in this fold
+        trainloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=10, sampler=train_subsampler)
+        testloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=10, sampler=test_subsampler)
+
+        # Init the neural network
+        network = CNN_Net(prps=properties, final_layer_size=2)
+        network.apply(reset_weights)
+
+        # Initialize optimizer
+        optimizer = torch.optim.Adam(network.parameters(), lr=1e-5)
+
+        # Run the training loop for defined number of epochs
+        for epoch in range(0, num_epochs):
+
+            # Print epoch
+            print(f'Starting epoch {epoch + 1}')
+
+            # Set current loss value
+            current_loss = 0.0
+
+            # Iterate over the DataLoader for training data
+            for i, data in enumerate(trainloader, 0):
+
+                # Get inputs
+                inputs, targets = data
+
+                # Zero the gradients
+                optimizer.zero_grad()
+
+                # Perform forward pass
+                outputs = network(inputs)
+
+                # Compute loss
+                loss = loss_function(outputs, targets)
+
+                # Perform backward pass
+                loss.backward()
+
+                # Perform optimization
+                optimizer.step()
+
+                # Print statistics
+                current_loss += loss.item()
+                if i % 500 == 499:
+                    print('Loss after mini-batch %5d: %.3f' %
+                          (i + 1, current_loss / 500))
+                    current_loss = 0.0
+
+        # Process is complete.
+        print('Training process has finished. Saving trained model.')
+
+        # Print about testing
+        print('Starting testing')
+
+        # Saving the model
+        save_path = f'./model-fold-{fold}.pth'
+        torch.save(network.state_dict(), save_path)
+
+        # Evaluation for this fold
+        correct, total = 0, 0
+        with torch.no_grad():
+
+            predictions = []
+            true_labels = []
+
+            # Iterate over the test data and generate predictions
+            for i, data in enumerate(testloader, 0):
+                # Get inputs
+                inputs, targets = data
+
+                # Generate outputs
+                outputs = network(inputs)
+
+                # Set total and correct
+                _, predicted = torch.max(outputs.data, 1)
+                total += targets.size(0)
+                correct += (predicted == targets).sum().item()
+
+                predictions.extend(outputs.data[:, 1].cpu().numpy())  # Grabs probability of positive
+                true_labels.extend(targets.cpu().numpy())
+
+            # Print accuracy
+            print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
+            print('--------------------------------')
+            results[fold] = 100.0 * (correct / total)
+
+
+        # MAKES ROC CURVE
+        thresholds = np.linspace(0, 1, num=50)
+        tpr = []
+        fpr = []
+        acc = []
+
+        true_labels = np.array(true_labels)
+
+        for threshold in thresholds:
+            # Thresholding the predictions (meaning all predictions above threshold are considered positive)
+            thresholded_predictions = (predictions >= threshold).astype(int)
+
+            # Calculating true positives, false positives, true negatives, false negatives
+            true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
+            false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
+            true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
+            false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
+
+            accuracy = (true_positives + true_negatives) / (
+                        true_positives + false_positives + true_negatives + false_negatives)
+
+            # Calculate TPR and FPR
+            tpr.append(true_positives / (true_positives + false_negatives))
+            fpr.append(false_positives / (false_positives + true_negatives))
+            acc.append(accuracy)
+
+        plt.plot(fpr, tpr, lw=2, label=f'ROC Fold {fold}')
+        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.0])
+
+        plt.xlabel('False Positive Rate (1 - Specificity)')
+        plt.ylabel('True Positive Rate (Sensitivity)')
+        plt.title('Receiver Operating Characteristic (ROC) Curve')
+        plt.legend(loc="lower right")
+
+    plt.savefig(f'./ROC_{k_folds}_Folds.png')
+    plt.show()
+
+    # Print fold results
+    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
+    print('--------------------------------')
+    sum = 0.0
+    for key, value in results.items():
+        print(f'Fold {key}: {value} %')
+        sum += value
+    print(f'Average: {sum / len(results.items())} %')

BIN
valloss_epoch_curve.png