Переглянути джерело

Attempted K-fold (unsuccessful), new graphs

Ruben 6 місяців тому
батько
коміт
e410eeef6e
8 змінених файлів з 265 додано та 71 видалено
  1. BIN
      ROC.png
  2. BIN
      avgloss_epoch_curve.png
  3. 18 14
      main.py
  4. 14 56
      utils/CNN.py
  5. 0 1
      utils/CNN_Layers.py
  6. 22 0
      utils/Graphs.py
  7. 211 0
      utils/K-fold.py
  8. BIN
      valloss_epoch_curve.png

BIN
avgloss_epoch_curve.png


+ 18 - 14
main.py

@@ -22,11 +22,13 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import glob
+import platform
 
 
 
 print("--- RUNNING ---")
 print("Pytorch Version: " + torch. __version__)
+print("Python Version: " + platform.python_version())
 
 # LOADING DATA
 # data & training properties:
@@ -85,24 +87,26 @@ val_dataloader = DataLoader(val_data, batch_size=properties['batch_size'], shuff
 
 
 # Display 4 images and labels.
-# x = 1
-# while x < 1:
-#     train_features, train_labels = next(iter(train_dataloader))
-#     print(f"Feature batch shape: {train_features.size()}")
-#     img = train_features[0].squeeze()
-#     print(f"Feature batch shape: {img.size()}")
-#     image = img[:, :, 40]
-#     print(f"Feature batch shape: {image.size()}")
-#     label = train_labels[0]
-#     print(f"Label: {label}")
-#     plt.imshow(image, cmap="gray")
-#     plt.show()
-#     x = x+1
+x = 0
+while x < 0:
+    train_features, train_labels = next(iter(train_dataloader))
+    print(f"Feature batch shape: {train_features.size()}")
+    img = train_features[0].squeeze()
+    print(f"Feature batch shape: {img.size()}")
+    image = img[:, :, 40]
+    print(f"Feature batch shape: {image.size()}")
+    label = train_labels[0]
+    print(f"Label: {label}")
+    plt.imshow(image, cmap="gray")
+    plt.savefig(f"./Image{x}_IS:{label}.png")
+    plt.show()
+
+    x = x+1
 
 
 train = False
 predict = False
-CNN = CNN_Net(train_dataloader, prps=properties, final_layer_size=2)
+CNN = CNN_Net(prps=properties, final_layer_size=2)
 CNN.cuda()
 
 # RUN CNN

+ 14 - 56
utils/CNN.py

@@ -5,28 +5,21 @@ import torch.nn as nn
 import utils.CNN_Layers as CustomLayers
 import torch.nn.functional as F
 import torch.optim as optim
-import utils.CNN_methods as CNN
 import pandas as pd
 import matplotlib.pyplot as plt
 import time
 import numpy as np
-# from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import roc_curve, auc
 
 class CNN_Net(nn.Module):
-    def __init__(self, input, prps, final_layer_size=5):
+    def __init__(self, prps, final_layer_size=5):
         super(CNN_Net, self).__init__()
         self.final_layer_size = final_layer_size
         self.device = device('cuda:0' if cuda.is_available() else 'cpu')
         print("CNN Initialized. Using: " + str(self.device))
 
-        # GETS FIRST IMAGE FOR SIZE
-        data_iter = iter(input)
-        first_batch = next(data_iter)
-        first_features = first_batch[0]
-        image = first_features[0]
-
         # LAYERS
-        print(f"CNN Model Initialization. Input size: {image.size()}")
+        print(f"CNN Model Initialization")
         self.conv1 = CustomLayers.Conv_elu_maxpool_drop(1, 192, (11, 13, 11), stride=(4,4,4), pool=True, prps=prps)
         self.conv2 = CustomLayers.Conv_elu_maxpool_drop(192, 384, (5, 6, 5), stride=(1,1,1), pool=True, prps=prps)
         self.conv3_mid_flow = CustomLayers.Mid_flow(384, 384, prps=prps)
@@ -107,19 +100,14 @@ class CNN_Net(nn.Module):
         losses.to_csv('./cnn_net_data.csv')
 
         # MAKES EPOCH VS AVG LOSS GRAPH
-        plt.plot(losses['Epoch'], losses['Avg_loss'])
+        plt.plot(losses['Epoch'], losses['Avg_loss'], label="Loss on Training")
         plt.xlabel('Epoch')
         plt.ylabel('Average Loss')
-        plt.title('Average Loss vs Epoch On Training')
-        plt.savefig('./avgloss_epoch_curve.png')
-        plt.show()
+        plt.title('Loss vs Epoch On Training & Validation data')
 
         # MAKES EPOCH VS VALIDATION LOSS GRAPH
-        plt.plot(losses['Epoch'], losses['Val_loss'])
-        plt.xlabel('Epoch')
-        plt.ylabel('Validation Loss')
-        plt.title('Validation Loss vs Epoch On Training')
-        plt.savefig('./valloss_epoch_curve.png')
+        plt.plot(losses['Epoch'], losses['Val_loss'], label="Loss on Validation")
+        plt.savefig('./avgloss_epoch_curve.png')
         plt.show()
 
         torch.save(self.state_dict(), PATH)
@@ -161,35 +149,17 @@ class CNN_Net(nn.Module):
         else:
             # ROC
             thresholds = np.linspace(0, 1, num=50)
-            tpr = []
-            fpr = []
-            acc = []
-
-
-            true_labels = np.array(true_labels)
 
-            for threshold in thresholds:
-                # Thresholding the predictions (meaning all predictions above threshold are considered positive)
-                thresholded_predictions = (predictions >= threshold).astype(int)
+            # Calculate TPR and FPR
+            fpr, tpr, thresholds = roc_curve(true_labels, predictions)
 
-                # Calculating true positives, false positives, true negatives, false negatives
-                true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
-                false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
-                true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
-                false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
+            # Calculate AUC
+            roc_auc = auc(fpr, tpr)
 
-                accuracy  = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)
-
-                # Calculate TPR and FPR
-                tpr.append(true_positives / (true_positives + false_negatives))
-                fpr.append(false_positives / (false_positives + true_negatives))
-                acc.append(accuracy)
-
-
-            plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
+            plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC: {roc_auc})')
             plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
-            plt.xlim([0.0, 1.0])
-            plt.ylim([0.0, 1.0])
+            plt.xlim([0.0, 1.005])
+            plt.ylim([0.0, 1.005])
 
             plt.xlabel('False Positive Rate (1 - Specificity)')
             plt.ylabel('True Positive Rate (Sensitivity)')
@@ -198,18 +168,6 @@ class CNN_Net(nn.Module):
             plt.savefig('./ROC.png')
             plt.show()
 
-            plt.plot(thresholds, acc)
-            plt.xlabel('Thresholds')
-            plt.ylabel('Accuracy')
-            plt.title('Accuracy vs thresholds')
-            plt.savefig('./acc.png')
-            plt.show()
-
-
-            # ROC ATTEMPT 2
-            # fprRoc, tprRoc = roc_curve(true_labels, predictions)
-            # plt.plot(fprRoc, tprRoc)
-
         self.train()
 
         return(loss.item())

+ 0 - 1
utils/CNN_Layers.py

@@ -4,7 +4,6 @@ from torch import add
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
-import utils.CNN_methods as CNN
 import copy
 
 class Conv_elu_maxpool_drop(nn.Module):

+ 22 - 0
utils/Graphs.py

@@ -0,0 +1,22 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+df = pd.read_csv("./cnn_net_data.csv")
+
+# To Regraph ROC, run main.py without training
+# It will run validation sett and create it based on latest trained model
+
+# MAKES EPOCH VS AVG LOSS GRAPH
+plt.plot(df['Epoch'], df['Avg_loss'], label="Loss on Training")
+plt.xlabel('Epoch')
+plt.ylabel('Average Loss')
+plt.title('Loss vs Epoch On Training & Validation data')
+
+# MAKES EPOCH VS VALIDATION LOSS GRAPH
+plt.plot(df['Epoch'], df['Val_loss'], label="Loss on Validation")
+plt.savefig('./avgloss_epoch_curve.png')
+plt.legend()
+plt.show()
+
+
+# ACCURACY VS THRESHOLD GRAPH

+ 211 - 0
utils/K-fold.py

@@ -0,0 +1,211 @@
+import os
+import torch
+from utils.CNN import CNN_Net
+from torch import nn
+from torch.utils.data import DataLoader, ConcatDataset
+from torchvision import transforms
+from sklearn.model_selection import KFold, StratifiedKFold
+from utils.preprocess import prepare_datasets, prepare_predict
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def reset_weights(m):
+    '''
+      Try resetting model weights to avoid
+      weight leakage.
+    '''
+    for layer in m.children():
+        if hasattr(layer, 'reset_parameters'):
+            print(f'Reset trainable parameters of layer = {layer}')
+            layer.reset_parameters()
+
+if __name__ == '__main__':
+
+    # Might have to replace datapaths or separate between training and testing
+    model_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN'
+    CNN_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/cnn_net.pth'  # cnn_net.pth
+    # mri_datapath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/PET_volumes_customtemplate_float32/'   # Small Test
+    mri_datapath = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'  # Real data
+    annotations_datapath = './data/data_wnx1/rschuurs/Pytorch_CNN-RNN/LP_ADNIMERGE.csv'
+
+    properties = {
+        "batch_size": 6,
+        "padding": 0,
+        "dilation": 1,
+        "groups": 1,
+        "bias": True,
+        "padding_mode": "zeros",
+        "drop_rate": 0
+    }
+
+    # Configuration options
+    k_folds = 5     # TODO
+    num_epochs = 1
+    loss_function = nn.CrossEntropyLoss()
+
+    # For fold results
+    results = {}
+
+    # Set fixed random number seed
+    torch.manual_seed(42)
+
+    training_data, val_data, test_data = prepare_datasets(mri_datapath, val_split=0.2, seed=12)
+
+    dataset = ConcatDataset([training_data, test_data])
+
+    # Define the K-fold Cross Validator
+    kfold = KFold(n_splits=k_folds, shuffle=True)
+
+    # Start print
+    print('--------------------------------')
+
+    # K-fold Cross Validation model evaluation
+    for fold, (train_ids, test_ids) in enumerate(kfold.split(training_data)):
+
+        # Print
+        print(f'FOLD {fold}')
+        print('--------------------------------')
+
+        # Sample elements randomly from a given list of ids, no replacement.
+        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
+        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
+
+        # Define data loaders for training and testing data in this fold
+        trainloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=10, sampler=train_subsampler)
+        testloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=10, sampler=test_subsampler)
+
+        # Init the neural network
+        network = CNN_Net(prps=properties, final_layer_size=2)
+        network.apply(reset_weights)
+
+        # Initialize optimizer
+        optimizer = torch.optim.Adam(network.parameters(), lr=1e-5)
+
+        # Run the training loop for defined number of epochs
+        for epoch in range(0, num_epochs):
+
+            # Print epoch
+            print(f'Starting epoch {epoch + 1}')
+
+            # Set current loss value
+            current_loss = 0.0
+
+            # Iterate over the DataLoader for training data
+            for i, data in enumerate(trainloader, 0):
+
+                # Get inputs
+                inputs, targets = data
+
+                # Zero the gradients
+                optimizer.zero_grad()
+
+                # Perform forward pass
+                outputs = network(inputs)
+
+                # Compute loss
+                loss = loss_function(outputs, targets)
+
+                # Perform backward pass
+                loss.backward()
+
+                # Perform optimization
+                optimizer.step()
+
+                # Print statistics
+                current_loss += loss.item()
+                if i % 500 == 499:
+                    print('Loss after mini-batch %5d: %.3f' %
+                          (i + 1, current_loss / 500))
+                    current_loss = 0.0
+
+        # Process is complete.
+        print('Training process has finished. Saving trained model.')
+
+        # Print about testing
+        print('Starting testing')
+
+        # Saving the model
+        save_path = f'./model-fold-{fold}.pth'
+        torch.save(network.state_dict(), save_path)
+
+        # Evaluation for this fold
+        correct, total = 0, 0
+        with torch.no_grad():
+
+            predictions = []
+            true_labels = []
+
+            # Iterate over the test data and generate predictions
+            for i, data in enumerate(testloader, 0):
+                # Get inputs
+                inputs, targets = data
+
+                # Generate outputs
+                outputs = network(inputs)
+
+                # Set total and correct
+                _, predicted = torch.max(outputs.data, 1)
+                total += targets.size(0)
+                correct += (predicted == targets).sum().item()
+
+                predictions.extend(outputs.data[:, 1].cpu().numpy())  # Grabs probability of positive
+                true_labels.extend(targets.cpu().numpy())
+
+            # Print accuracy
+            print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
+            print('--------------------------------')
+            results[fold] = 100.0 * (correct / total)
+
+
+        # MAKES ROC CURVE
+        thresholds = np.linspace(0, 1, num=50)
+        tpr = []
+        fpr = []
+        acc = []
+
+        true_labels = np.array(true_labels)
+
+        for threshold in thresholds:
+            # Thresholding the predictions (meaning all predictions above threshold are considered positive)
+            thresholded_predictions = (predictions >= threshold).astype(int)
+
+            # Calculating true positives, false positives, true negatives, false negatives
+            true_positives = np.sum((thresholded_predictions == 1) & (true_labels == 1))
+            false_positives = np.sum((thresholded_predictions == 1) & (true_labels == 0))
+            true_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 0))
+            false_negatives = np.sum((thresholded_predictions == 0) & (true_labels == 1))
+
+            accuracy = (true_positives + true_negatives) / (
+                        true_positives + false_positives + true_negatives + false_negatives)
+
+            # Calculate TPR and FPR
+            tpr.append(true_positives / (true_positives + false_negatives))
+            fpr.append(false_positives / (false_positives + true_negatives))
+            acc.append(accuracy)
+
+        plt.plot(fpr, tpr, lw=2, label=f'ROC Fold {fold}')
+        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.0])
+
+        plt.xlabel('False Positive Rate (1 - Specificity)')
+        plt.ylabel('True Positive Rate (Sensitivity)')
+        plt.title('Receiver Operating Characteristic (ROC) Curve')
+        plt.legend(loc="lower right")
+
+    plt.savefig(f'./ROC_{k_folds}_Folds.png')
+    plt.show()
+
+    # Print fold results
+    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
+    print('--------------------------------')
+    sum = 0.0
+    for key, value in results.items():
+        print(f'Fold {key}: {value} %')
+        sum += value
+    print(f'Average: {sum / len(results.items())} %')

BIN
valloss_epoch_curve.png