瀏覽代碼

Model work - Dataloading working, issues with model

Nicholas Schense 1 年之前
父節點
當前提交
f5d98d841e
共有 6 個文件被更改,包括 305 次插入97 次删除
  1. 64 84
      main.py
  2. 二進制
      utils/__pycache__/models.cpython-38.pyc
  3. 二進制
      utils/__pycache__/preprocess.cpython-38.pyc
  4. 二進制
      utils/__pycache__/show_image.cpython-38.pyc
  5. 205 0
      utils/models.py
  6. 36 13
      utils/preprocess.py

+ 64 - 84
main.py

@@ -8,6 +8,7 @@ from torch.utils.data import DataLoader
 from torchvision import datasets
 
 from torch import nn
+import torch.nn.functional as F
 from torchvision.transforms import ToTensor
 
 # import nonechucks as nc     # Used to load data in pytorch even when images are corrupted / unavailable (skips them)
@@ -21,107 +22,86 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import glob
+from datetime import datetime
 
+# FOR TRAINING
+import torch.optim as optim
+import utils.models as models
+
+# FOR TESTING
+from torchsummary import summary
 
 
 print("--- RUNNING ---")
 print("Pytorch Version: " + torch. __version__)
 
+# data & training properties:
+val_split = 0.2     # % of val and test, rest will be train
+runs = 1
+epochs = 100
+time_stamp = timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+seeds = [np.random.randint(0, 1000) for _ in range(runs)]
 
-# MAYBE??
-'''
-import sys
-sys.path.append('//data/data_wnx3/data_wnx1/rschuurs/CNN+RNN-2class-1cnn-CLEAN/utils')
 
-import os
-os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # use id from $ nvidia-smi
-'''
 
-# LOADING DATA
-# data & training properties:
-val_split = 0.2     # % of val and test, rest will be train
-seed = 12       # TODO Randomize seed
-'''
-target_rows = 91
-target_cols = 109
-depth = 91
-axis = 1
-num_clinical = 2
-CNN_drop_rate = 0.3
-RNN_drop_rate = 0.1
-CNN_w_regularizer = regularizers.l2(2e-2)
-RNN_w_regularizer = regularizers.l2(1e-6)
-CNN_batch_size = 10
-RNN_batch_size = 5
-val_split = 0.2
-optimizer = Adam(lr=1e-5)
-final_layer_size = 5
-'''
-
-
-# Might have to replace datapaths or separate between training and testing
-model_filepath = '//data/data_wnx1/rschuurs/Pytorch_CNN-RNN'
 mri_datapath = './ADNI_volumes_customtemplate_float32/'
-annotations_datapath = './LP_ADNIMERGE.csv'
+xls_file = './Lp_ADNIMERGE.csv'
 
-# annotations_file = pd.read_csv(annotations_datapath)    # DataFrame
-
-# show_image(17508)
 
 # TODO: Datasets include multiple labels, such as medical info
-training_data, val_data, test_data = prepare_datasets(mri_datapath, val_split, seed)
-batch_size = 64
-
-# Create data loaders
-train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
-test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
-val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
-
-for X, y in train_dataloader:
-    print(f"Shape of X [N, C, H, W]: {X.shape}")
-    print(f"Shape of y: {y.shape} {y.dtype}")
-    break
-
-
-# Display 10 images and labels.
-x = 0
-while x < 10:
-    train_features, train_labels = next(iter(train_dataloader))
-    print(f"Feature batch shape: {train_features.size()}")
-    img = train_features[0].squeeze()
-    image = img[:, :, 40]
-    label = train_labels[0]
-    plt.imshow(image, cmap="gray")
-    plt.show()
-    print(f"Label: {label}")
-    x = x+1
 
-print("--- END ---")
 
-# EXTRA
+def evaluate_model(seed):
+    training_data, val_data, test_data = prepare_datasets(mri_datapath, xls_file, val_split, seed)
+    batch_size = 64
 
-# will I need these params?
-'''
-params_dict = { 'CNN_w_regularizer': CNN_w_regularizer, 'RNN_w_regularizer': RNN_w_regularizer,
-               'CNN_batch_size': CNN_batch_size, 'RNN_batch_size': RNN_batch_size,
-               'CNN_drop_rate': CNN_drop_rate, 'epochs': 30,
-          'gpu': "/gpu:0", 'model_filepath': model_filepath,
-          'image_shape': (target_rows, target_cols, depth, axis),
-          'num_clinical': num_clinical,
-          'final_layer_size': final_layer_size,
-          'optimizer': optimizer, 'RNN_drop_rate': RNN_drop_rate,}
+    # Create data loaders
+    train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
+    test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
+    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
 
-params = Parameters(params_dict)
+    model_CNN = models.CNN_Net(1, 1, 0.5, 5)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model_CNN.parameters(), lr=0.001)
+    print("Seed: ", seed)
+    epoch_number = 0
 
-# WHAT WAS THIS AGAIN?
-seeds = [np.random.randint(1, 5000) for _ in range(1)]
+    for epoch in range(epochs):
+        running_loss = 0.0
+        for i, data in enumerate(train_dataloader, 0):
+            mri, xls, label = data
 
-# READ THIS TO UNDERSTAND TRAIN VS VALIDATION DATA
-def evaluate_net (seed):
-    n_classes = 2
-    data_loader = DataLoader((target_rows, target_cols, depth, axis), seed = seed)
-    train_data, val_data, test_data,rnn_HdataT1,rnn_HdataT2,rnn_HdataT3,rnn_AdataT1,rnn_AdataT2,rnn_AdataT3, test_mri_nonorm = data_loader.get_train_val_test(val_split, mri_datapath)
+            optimizer.zero_grad()
 
-    print('Length Val Data[0]: ',len(val_data[0]))
-'''
+            outputs = model_CNN(mri, xls)
+            loss = criterion(outputs, label)
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item()
+            if i % 1000 == 999:
+                print("Epoch: ", epoch_number, "Batch: ", i+1, "Loss: ", running_loss / 1000, "Accuracy: ", )
+                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000))
+                running_loss = 0.0
+        epoch_number += 1
+
+    #Test model
+    correct = 0
+    total = 0
+
+    with torch.no_grad():
+        for data in test_dataloader:
+            images, labels = data
+            outputs = model_CNN(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    print("Model Accuracy: ", 100 * correct / total)
+
+        
+for seed in seeds:
+    evaluate_model(seed)
+
+
+print("--- END ---")

二進制
utils/__pycache__/models.cpython-38.pyc


二進制
utils/__pycache__/preprocess.cpython-38.pyc


二進制
utils/__pycache__/show_image.cpython-38.pyc


+ 205 - 0
utils/models.py

@@ -0,0 +1,205 @@
+from torch import nn
+from torchvision.transforms import ToTensor
+import os
+import pandas as pd
+import numpy as np
+
+import torch
+import torchvision
+
+
+class SeperableConv3d(nn.Module):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=False
+    ):
+        super(SeperableConv3d, self).__init__()
+        self.depthwise = nn.Conv3d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            groups=in_channels,
+            padding=padding,
+            bias=bias,
+            stride=stride,
+        )
+        self.pointwise = nn.Conv3d(
+            in_channels, out_channels, 1, padding=padding, bias=bias, stride=stride
+        )
+
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+
+
+class SplitConvBlock(nn.Module):
+    def __init__(self, in_channels, mid_channels, out_channels, split_dim, drop_rate):
+        super(SplitConvBlock, self).__init__()
+
+        self.split_dim = split_dim
+
+        self.leftconv_1 = CNN_Net.SeperableConvolutionalBlock(
+            (3, 4, 3), in_channels //2, mid_channels //2, droprate=drop_rate
+        )
+        self.rightconv_1 = CNN_Net.SeperableConvolutionalBlock(
+            (4, 3, 3), in_channels //2, mid_channels //2, droprate=drop_rate
+        )
+
+        self.leftconv_2 = CNN_Net.SeperableConvolutionalBlock(
+            (3, 4, 3), mid_channels //2, out_channels //2, droprate=drop_rate
+        )
+        self.rightconv_2 = CNN_Net.SeperableConvolutionalBlock(
+            (4, 3, 3), mid_channels //2, out_channels //2, droprate=drop_rate
+        )
+
+        
+
+    def forward(self, x):
+        (left, right) = torch.tensor_split(x, 2, dim=self.split_dim)
+
+        self.leftblock = nn.Sequential(self.leftconv_1, self.leftconv_2)
+        self.rightblock = nn.Sequential(self.rightconv_1, self.rightconv_2)
+
+        left = self.leftblock(left)
+        right = self.rightblock(right)
+        return torch.cat((left, right), dim=self.split_dim)
+
+
+class MidFlowBlock(nn.Module):
+    def __init__(self, channels, drop_rate):
+        super(MidFlowBlock, self).__init__()
+
+        self.conv1 = CNN_Net.SeperableConvolutionalBlock(
+            (3, 3, 3), channels, channels, droprate=drop_rate
+        )
+        self.conv2 = CNN_Net.SeperableConvolutionalBlock(
+            (3, 3, 3), channels, channels, droprate=drop_rate
+        )
+        self.conv3 = CNN_Net.SeperableConvolutionalBlock(
+            (3, 3, 3), channels, channels, droprate=drop_rate
+        )
+
+        self.block = nn.Sequential(self.conv1, self.conv2, self.conv3)
+
+    def forward(self, x):
+        return nn.ELU(self.block(x) + x)
+
+
+class Parameters:
+    def __init__(self, param_dict):
+        self.CNN_w_regularizer = param_dict["CNN_w_regularizer"]
+        self.RNN_w_regularizer = param_dict["RNN_w_regularizer"]
+        self.CNN_batch_size = param_dict["CNN_batch_size"]
+        self.RNN_batch_size = param_dict["RNN_batch_size"]
+        self.CNN_drop_rate = param_dict["CNN_drop_rate"]
+        self.RNN_drop_rate = param_dict["RNN_drop_rate"]
+        self.epochs = param_dict["epochs"]
+        self.gpu = param_dict["gpu"]
+        self.model_filepath = param_dict["model_filepath"] + "/net.h5"
+        self.num_clinical = param_dict["num_clinical"]
+        self.image_shape = param_dict["image_shape"]
+        self.final_layer_size = param_dict["final_layer_size"]
+        self.optimizer = param_dict["optimizer"]
+
+
+class CNN_Net(nn.Module):
+    def ConvolutionalBlock(
+        kernel_size,
+        in_channels,
+        out_channels,
+        stride=(1, 1, 1),
+        padding="valid",
+        droprate=None,
+        pool=False,
+    ):
+        conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding)
+        norm = nn.BatchNorm3d(out_channels)
+        elu = nn.ELU()
+        dropout = nn.Dropout(droprate)
+
+        if pool:
+            maxpool = nn.MaxPool3d(3, stride=2)
+            return nn.Sequential(conv, norm, elu, maxpool, dropout)
+        else:
+            return nn.Sequential(conv, norm, elu, dropout)
+
+    def FullyConnectedBlock(in_channels, out_channels, droprate=0.0):
+        dense = nn.Linear(in_channels, out_channels)
+        norm = nn.BatchNorm1d(out_channels)
+        elu = nn.ELU()
+        dropout = nn.Dropout(droprate)
+        return nn.Sequential(dense, norm, elu, dropout)
+
+    def SeperableConvolutionalBlock(
+        kernel_size,
+        in_channels,
+        out_channels,
+        stride=(1, 1, 1),
+        padding="valid",
+        droprate=None,
+        pool=False,
+    ):
+        conv = SeperableConv3d(in_channels, out_channels, kernel_size, stride, padding)
+        norm = nn.BatchNorm3d(out_channels)
+        elu = nn.ELU()
+        dropout = nn.Dropout(droprate)
+
+        if pool:
+            maxpool = nn.MaxPool3d(3, stride=2)
+            return nn.Sequential(conv, norm, elu, maxpool, dropout)
+        else:
+            return nn.Sequential(conv, norm, elu, dropout)
+
+    def __init__(self, image_channels, clin_data_channels, droprate, final_layer_size):
+        super().__init__()
+
+        # Initial Convolutional Blocks
+        self.conv1 = CNN_Net.ConvolutionalBlock(
+            (11, 13, 11), image_channels, 192, stride=(4, 4, 4), droprate=droprate, pool=True
+        )
+        self.conv2 = CNN_Net.ConvolutionalBlock(
+            (5, 6, 5), 192, 384, droprate=droprate, pool=True
+        )
+
+        # Midflow Block
+        self.midflow = MidFlowBlock(384, droprate)
+
+        # Combine
+        self.combined = nn.Sequential(self.conv1, self.conv2, self.midflow)
+
+        # Split Convolutional Block
+        self.splitconv = SplitConvBlock(384, 192, 96, 4, droprate)
+
+        #Fully Connected Block
+        self.fc1 = CNN_Net.FullyConnectedBlock(96, 20, droprate=droprate)
+
+        self.image_layers = nn.Sequential(self.combined, self.splitconv).double()
+
+
+        #Data Layers, fully connected
+        self.fc1 = CNN_Net.FullyConnectedBlock(clin_data_channels, 64, droprate=droprate)
+        self.fc2 = CNN_Net.FullyConnectedBlock(64, 20, droprate=droprate)
+        
+        #Conntect Data 
+        self.data_layers = nn.Sequential(self.fc1, self.fc2).double()
+
+        #Final Dense Layer
+        self.dense1 = nn.Linear(40, final_layer_size)
+        self.dense2 = nn.Linear(final_layer_size, 2)
+        self.softmax = nn.Softmax()
+
+        self.final_layers = nn.Sequential(self.dense1, self.dense2, self.softmax)
+
+    def forward(self, image, clin_data):
+
+        print(image.shape)
+    
+        image = self.image_layers(image)
+        x = torch.cat((image, clin_data), dim=1)
+        x = self.final_layers(x)
+        return x
+
+
+
+
+        

+ 36 - 13
utils/preprocess.py

@@ -7,19 +7,21 @@ import numpy as np
 import random
 import torch
 from torch.utils.data import Dataset
+import pandas as pd 
 
 
 '''
 Prepares CustomDatasets for training, validating, and testing CNN
 '''
-def prepare_datasets(mri_dir, val_split=0.2, seed=50):
+def prepare_datasets(mri_dir, xls_file, val_split=0.2, seed=50):
 
     rndm = random.Random(seed)
-
+    xls_data = pd.read_csv(xls_file).set_index('Image Data ID')
     raw_data = glob.glob(mri_dir + "*")
     AD_list = []
     NL_list = []
 
+
     print("--- DATA INFO ---")
     print("Amount of images: " + str(len(raw_data)))
 
@@ -42,9 +44,9 @@ def prepare_datasets(mri_dir, val_split=0.2, seed=50):
     rndm.shuffle(val_list)
     rndm.shuffle(test_list)
 
-    train_dataset = CustomDataset(train_list)
-    val_dataset = CustomDataset(val_list)
-    test_dataset = CustomDataset(test_list)
+    train_dataset = CustomDataset(train_list, xls_data)
+    val_dataset = CustomDataset(val_list, xls_data)
+    test_dataset = CustomDataset(test_list, xls_data)
 
     return train_dataset, val_dataset, test_dataset
 
@@ -89,17 +91,38 @@ def get_train_val_test(AD_list, NL_list, val_split):
 
 
 class CustomDataset(Dataset):
-    def __init__(self, list):
-        self.data = list        # DATA IS A LIST WITH TUPLES (image_dir, class_id)
+    def __init__(self, mri, xls: pd.DataFrame):
+        self.mri_data = mri        # DATA IS A LIST WITH TUPLES (image_dir, class_id)
+        self.xls_data = xls
+
 
     def __len__(self):
-        return len(self.data)
+        return len(self.mri_data)
+    
+    def _xls_to_tensor(self, xls_data: pd.Series):
+        #Get used data
+
+        #data = xls_data.loc[['Sex', 'Age (current)', 'PTID', 'DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence)', 'Alz_csf']]
+        data = xls_data.loc[['Sex', 'Age (current)']]
+        
+        data.replace({'M': 0, 'F': 1}, inplace=True)
+        
+
+        #Convert to tensor
+        xls_tensor = torch.tensor(data.values.astype(float))
+        
+        return xls_tensor
 
     def __getitem__(self, idx):     # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
-        mri_path, class_id = self.data[idx]
+        mri_path, class_id = self.mri_data[idx]
         mri = nib.load(mri_path)
         mri_data = mri.get_fdata()
-        # mri_array = np.array(mri)
-        # mri_tensor = torch.from_numpy(mri_array)
-        # class_id = torch.tensor([class_id]) TODO return tensor or just id (0, 1)??
-        return mri_data, class_id
+
+        xls = self.xls_data.iloc[idx]
+
+        #Convert xls data to tensor
+        xls_tensor = self._xls_to_tensor(xls)
+        mri_tensor = torch.from_numpy(mri_data).unsqueeze(0)
+        
+        class_id = torch.tensor([class_id])
+        return mri_tensor, xls_tensor, class_id