ソースを参照

Begin conversion from old organization system

Nicholas Schense 5 ヶ月 前
コミット
b6db03c389
9 ファイル変更549 行追加0 行削除
  1. 0 0
      LP_ADNIMERGE.csv
  2. 1 0
      README.md
  3. 25 0
      config.toml
  4. 40 0
      train_cnn.py
  5. 131 0
      utils/data/datasets.py
  6. 0 0
      utils/data/preprocessing.py
  7. 103 0
      utils/models/cnn.py
  8. 173 0
      utils/models/layers.py
  9. 76 0
      utils/training.py

ファイルの差分が大きいため隠しています
+ 0 - 0
LP_ADNIMERGE.csv


+ 1 - 0
README.md

@@ -0,0 +1 @@
+# Alzheimers Diagnosis Neural Net Project Rewrite

+ 25 - 0
config.toml

@@ -0,0 +1,25 @@
+[paths]
+mri_data = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'
+xls_data = '/export/home/nschense/alzheimers/alzheimers_nn/LP_ADNIMERGE.csv'
+
+#CHANGE THESE BEFORE RUNNING
+model_output = '/export/home/nschense/alzheimers/alzheimers_nn/saved_models/'
+
+[training]
+device = 'cuda:1'
+runs = 1
+max_epochs = 30
+
+[dataset]
+validation_split = 0.3
+
+[model]
+name = 'alzheimers+cnn'
+
+[hyperparameters]
+batch_size = 64
+learning_rate = 0.0001
+droprate = 0.5
+
+[operation]
+silent = false

+ 40 - 0
train_cnn.py

@@ -0,0 +1,40 @@
+#MACHINE LEARNING
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torchvision
+
+#GENERAL USE
+import numpy as np
+import pandas as pd
+from datetime import datetime
+
+#SYSTEM
+import tomli as toml
+import os
+
+#DATA PROCESSING
+from sklearn.model_selection import train_test_split
+
+#CUSTOM MODULES
+import utils.models.cnn as cnn
+
+#CONFIGURATION
+if os.getenv('ADL_CONFIG_PATH') is None:
+    with open ('config.toml', 'rb') as f:
+        config = toml.load(f)
+else:
+    with open(os.getenv('ADL_CONFIG_PATH'), 'rb') as f:
+        config = toml.load(f)
+        
+        
+#Set up the model
+model = cnn.CNN(config)
+criterion = nn.BCELoss()
+optimizer = optim.Adam(model.parameters(), lr = config['training']['learning_rate'])
+
+#Load datasets
+
+
+
+

+ 131 - 0
utils/data/datasets.py

@@ -0,0 +1,131 @@
+# NEEDS TO BE FINISHED
+# TODO CHECK ABOUT IMAGE DIMENSIONS
+# TODO ENSURE ITERATION WORKS
+import glob
+import nibabel as nib
+import numpy as np
+import random
+import torch
+from torch.utils.data import Dataset
+import pandas as pd 
+from torch.utils.data import DataLoader
+
+
+
+'''
+Prepares CustomDatasets for training, validating, and testing CNN
+'''
+def prepare_datasets(mri_dir, xls_file, val_split=0.2, seed=50):
+
+    rndm = random.Random(seed)
+    xls_data = pd.read_csv(xls_file).set_index('Image Data ID')
+    raw_data = glob.glob(mri_dir + "*")
+    AD_list = []
+    NL_list = []
+
+    # TODO Check that image is in CSV?
+    for image in raw_data:
+        if "NL" in image:
+            NL_list.append(image)
+        elif "AD" in image:
+            AD_list.append(image)
+
+    rndm.shuffle(AD_list)
+    rndm.shuffle(NL_list)
+
+    train_list, val_list, test_list = get_train_val_test(AD_list, NL_list, val_split)
+
+    rndm.shuffle(train_list)
+    rndm.shuffle(val_list)
+    rndm.shuffle(test_list)
+
+    train_dataset = ADNIDataset(train_list, xls_data)
+    val_dataset = ADNIDataset(val_list, xls_data)
+    test_dataset = ADNIDataset(test_list, xls_data)
+
+    return train_dataset, val_dataset, test_dataset
+
+    # TODO  Normalize data? Later add / Exctract clinical data? Which data?
+
+'''
+Returns train_list, val_list and test_list in format [(image, id), ...] each
+'''
+def get_train_val_test(AD_list, NL_list, val_split):
+
+    train_list, val_list, test_list = [], [], []
+
+    num_test_ad = int(len(AD_list) * val_split)
+    num_test_nl = int(len(NL_list) * val_split)
+
+    num_val_ad = int((len(AD_list) - num_test_ad) * val_split)
+    num_val_nl = int((len(NL_list) - num_test_nl) * val_split)
+
+    # Sets up ADs
+    for image in AD_list[0:num_val_ad]:
+        val_list.append((image, 1))
+
+    for image in AD_list[num_val_ad:num_test_ad]:
+        test_list.append((image, 1))
+
+    for image in AD_list[num_test_ad:]:
+        train_list.append((image, 1))
+
+    # Sets up NLs
+    for image in NL_list[0:num_val_nl]:
+        val_list.append((image, 0))
+
+    for image in NL_list[num_val_nl:num_test_nl]:
+        test_list.append((image, 0))
+
+    for image in NL_list[num_test_nl:]:
+        train_list.append((image, 0))
+
+    return train_list, val_list, test_list
+
+
+class ADNIDataset(Dataset):
+    def __init__(self, mri, xls: pd.DataFrame):
+        self.mri_data = mri        # DATA IS A LIST WITH TUPLES (image_dir, class_id)
+        self.xls_data = xls
+
+
+    def __len__(self):
+        return len(self.mri_data)
+    
+    def _xls_to_tensor(self, xls_data: pd.Series):
+        #Get used data
+
+        #data = xls_data.loc[['Sex', 'Age (current)', 'PTID', 'DXCONFID (1=uncertain, 2= mild, 3= moderate, 4=high confidence)', 'Alz_csf']]
+        data = xls_data.loc[['Sex', 'Age (current)']]
+        
+        data.replace({'M': 0, 'F': 1}, inplace=True)
+        
+
+        #Convert to tensor
+        xls_tensor = torch.tensor(data.values.astype(float))
+        
+        return xls_tensor
+
+    def __getitem__(self, idx):     # RETURNS TUPLE WITH IMAGE AND CLASS_ID, BASED ON INDEX IDX
+        mri_path, class_id = self.mri_data[idx]
+        mri = nib.load(mri_path)
+        mri_data = mri.get_fdata()
+
+        xls = self.xls_data.iloc[idx]
+
+        #Convert xls data to tensor
+        xls_tensor = self._xls_to_tensor(xls)
+        mri_tensor = torch.from_numpy(mri_data).unsqueeze(0)
+        
+        class_id = torch.tensor([class_id])
+        #Convert to one-hot and squeeze
+        class_id = torch.nn.functional.one_hot(class_id, num_classes=2).squeeze(0)
+
+        return (mri_tensor, xls_tensor), class_id
+    
+    
+def initalize_dataloaders(training_data, val_data, test_data, cuda_device=torch.device('cuda:0'), batch_size=64):
+    train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
+    test_dataloader = DataLoader(test_data, batch_size=(batch_size // 4), shuffle=True, generator=torch.Generator(device=cuda_device))
+    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=cuda_device))
+    return train_dataloader, val_dataloader, test_dataloader

+ 0 - 0
utils/data/preprocessing.py


+ 103 - 0
utils/models/cnn.py

@@ -0,0 +1,103 @@
+from torch import nn
+from torchvision.transforms import ToTensor
+import os
+import pandas as pd
+import numpy as np
+import layers as ly
+
+import torch
+import torchvision
+
+
+class Parameters:
+    def __init__(self, param_dict):
+        self.CNN_w_regularizer = param_dict["CNN_w_regularizer"]
+        self.RNN_w_regularizer = param_dict["RNN_w_regularizer"]
+        self.CNN_batch_size = param_dict["CNN_batch_size"]
+        self.RNN_batch_size = param_dict["RNN_batch_size"]
+        self.CNN_drop_rate = param_dict["CNN_drop_rate"]
+        self.RNN_drop_rate = param_dict["RNN_drop_rate"]
+        self.epochs = param_dict["epochs"]
+        self.gpu = param_dict["gpu"]
+        self.model_filepath = param_dict["model_filepath"] + "/net.h5"
+        self.num_clinical = param_dict["num_clinical"]
+        self.image_shape = param_dict["image_shape"]
+        self.final_layer_size = param_dict["final_layer_size"]
+        self.optimizer = param_dict["optimizer"]
+
+
+class CNN(nn.Module):
+
+    def __init__(self, image_channels, clin_data_channels, droprate):
+        super().__init__()
+
+        #Image Section
+        self.image_section = CNN_Image_Section(image_channels, droprate)
+
+        #Data Layers, fully connected
+        self.fc_clin1 = ly.FullConnBlock(clin_data_channels, 64, droprate=droprate)
+        self.fc_clin2 = ly.FullConnBlock(64, 20, droprate=droprate)
+        
+
+        #Final Dense Layer
+        self.dense1 = nn.Linear(40, 5)
+        self.dense2 = nn.Linear(5, 2)
+        self.softmax = nn.Softmax(dim = 1)
+
+       
+
+    def forward(self, x):
+
+        image, clin_data = x
+
+        image = self.image_section(image)
+
+        
+
+        clin_data = self.fc_clin1(clin_data)
+        clin_data = self.fc_clin2(clin_data)
+
+
+        x = torch.cat((image, clin_data), dim=1)
+        x = self.dense1(x)
+        x = self.dense2(x)
+        x = self.softmax(x)
+        return x
+
+
+
+
+
+class CNN_Image_Section(nn.Module):
+    def __init__(self, image_channels, droprate):
+        super().__init__()
+            # Initial Convolutional Blocks
+        self.conv1 = ly.ConvBlock(
+            image_channels, 192, (11, 13, 11), stride=(4, 4, 4), droprate=droprate, pool=False
+        )
+        self.conv2 = ly.ConvBlock(
+            192, 384, (5, 6, 5), droprate=droprate, pool=False
+        )
+
+        # Midflow Block
+        self.midflow = ly.MidFlowBlock(384, droprate)
+
+        
+
+        # Split Convolutional Block
+        self.splitconv = ly.SplitConvBlock(384, 192, 96, 1, droprate)
+
+        #Fully Connected Block
+        self.fc_image = ly.FullConnBlock(227136, 20, droprate=droprate)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.midflow(x)
+        x = self.splitconv(x)
+        x = torch.flatten(x, 1)
+        x = self.fc_image(x)
+
+        return x
+
+

+ 173 - 0
utils/models/layers.py

@@ -0,0 +1,173 @@
+from torch import nn
+from torchvision.transforms import ToTensor
+import os
+import pandas as pd
+import numpy as np
+
+import torch
+import torchvision
+
+
+class SepConv3d(nn.Module):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=False
+    ):
+        super(SepConv3d, self).__init__()
+        self.depthwise = nn.Conv3d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            groups=out_channels,
+            padding=padding,
+            bias=bias,
+            stride=stride,
+        )
+
+    def forward(self, x):
+        x = self.depthwise(x)
+        return x
+    
+
+class SplitConvBlock(nn.Module):
+    def __init__(self, in_channels, mid_channels, out_channels, split_dim, drop_rate):
+        super(SplitConvBlock, self).__init__()
+
+        self.split_dim = split_dim
+
+        self.leftconv_1 = SepConvBlock(
+            in_channels //2, mid_channels //2, (3, 4, 3),  droprate=drop_rate
+        )
+        self.rightconv_1 = SepConvBlock(
+            in_channels //2, mid_channels //2, (3, 4, 3),  droprate=drop_rate
+        )
+
+        self.leftconv_2 = SepConvBlock(
+            mid_channels //2, out_channels //2, (3, 4, 3),  droprate=drop_rate
+        )
+        self.rightconv_2 = SepConvBlock(
+            mid_channels //2, out_channels //2, (3, 4, 3),  droprate=drop_rate
+        )
+
+        
+
+    def forward(self, x):
+        (left, right) = torch.tensor_split(x, 2, dim=self.split_dim)
+
+
+        self.leftblock = nn.Sequential(self.leftconv_1, self.leftconv_2)
+        self.rightblock = nn.Sequential(self.rightconv_1, self.rightconv_2)
+
+        left = self.leftblock(left)
+        right = self.rightblock(right)
+        x = torch.cat((left, right), dim=self.split_dim)
+        return x
+    
+
+class MidFlowBlock(nn.Module):
+    def __init__(self, channels, drop_rate):
+        super(MidFlowBlock, self).__init__()
+
+        self.conv1 = ConvBlock(
+            channels, channels, (3, 3, 3),  droprate=drop_rate, padding="same"
+        )
+        self.conv2 = ConvBlock(
+            channels, channels, (3, 3, 3), droprate=drop_rate, padding="same"
+        )
+        self.conv3 = ConvBlock(
+            channels, channels, (3, 3, 3),  droprate=drop_rate, padding="same"
+        )
+
+        #self.block = nn.Sequential(self.conv1, self.conv2, self.conv3)
+        self.block = self.conv1
+
+    def forward(self, x):
+        x = nn.ELU()(self.block(x) + x)
+        return x
+
+        
+class ConvBlock(nn.Module):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=(1, 1, 1),
+            padding="valid",
+            droprate=None,
+            pool=False,
+    ):
+        super(ConvBlock, self).__init__()
+        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding)
+        self.norm = nn.BatchNorm3d(out_channels)
+        self.elu = nn.ELU()
+        self.dropout = nn.Dropout(droprate)
+        
+        if pool:
+            self.maxpool = nn.MaxPool3d(3, stride=2)
+        else:
+            self.maxpool = None
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.norm(x)
+        x = self.elu(x)
+       
+        
+        if self.maxpool:
+            x = self.maxpool(x)
+
+        x = self.dropout(x)
+            
+        return x
+
+
+class FullConnBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, droprate=0.0):
+        super(FullConnBlock, self).__init__()
+        self.dense = nn.Linear(in_channels, out_channels)
+        self.norm = nn.BatchNorm1d(out_channels)
+        self.elu = nn.ELU()
+        self.dropout = nn.Dropout(droprate)
+
+    def forward(self, x):
+        x = self.dense(x)
+        x = self.norm(x)
+        x = self.elu(x)
+        x = self.dropout(x)
+        return x
+    
+
+class SepConvBlock(nn.Module):
+    def __init__(
+      self,
+      in_channels,
+      out_channels,
+      kernel_size,
+      stride = (1, 1, 1),
+      padding = "valid",
+      droprate = None,
+      pool = False,      
+    ):
+        super(SepConvBlock, self).__init__()
+        self.conv = SepConv3d(in_channels, out_channels, kernel_size, stride, padding)
+        self.norm = nn.BatchNorm3d(out_channels)
+        self.elu = nn.ELU()
+        self.dropout = nn.Dropout(droprate)
+        
+        if pool:
+            self.maxpool = nn.MaxPool3d(3, stride=2)
+        else:
+            self.maxpool = None
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.norm(x)
+        x = self.elu(x)
+        
+        
+        if self.maxpool:
+            x = self.maxpool(x)
+
+        x = self.dropout(x)
+
+        return x

+ 76 - 0
utils/training.py

@@ -0,0 +1,76 @@
+import torch 
+import torch.nn as nn
+import torch.optim as optim
+import torchvision
+from torch.utils.data import DataLoader
+import pandas as pd
+
+
+
+def train_epoch(model, train_loader, val_loader, criterion, optimizer):
+    model.train()
+    train_loss = 0
+    val_loss = 0
+    for i, (data, target) in enumerate(train_loader):
+        optimizer.zero_grad()
+        
+        output = model(data)
+        loss = criterion(output, target)
+        loss.backward()
+        optimizer.step()
+        
+        train_loss += loss.item()
+        
+    train_loss /= len(train_loader)
+    
+    model.eval()
+    with torch.no_grad():
+        for i, (data, target) in enumerate(val_loader):
+            output = model(data)
+            loss = criterion(output, target)
+            val_loss += loss.item()
+        val_loss /= len(val_loader)
+        
+    return train_loss, val_loss
+
+def evaluate_accuracy(model, loader):
+    model.eval()
+    correct = 0
+    total = 0
+    predictions = []
+    actual = []
+    
+    with torch.no_grad():
+        for data, target in loader:
+            output = model(data)
+            _, predicted = torch.max(output.data, 1)
+            total += target.size(0)
+            correct += (predicted == target).sum().item()
+            
+            out = output[:, 1].tolist()
+            predictions.extend(out)
+            
+            act = target[:, 1].tolist()
+            actual.extend(act)
+            
+    return correct / total, predictions, actual
+
+def train_model(model, train_loader, val_loader, criterion, optimizer, config):
+    
+    history = pd.DataFrame(columns = ["Epoch", "Train Loss", "Val Loss", "Train Acc","Val Acc"]).set_index("Epoch")
+    
+    
+    for epoch in range(config["training"]["epochs"]):
+        train_loss, val_loss = train_epoch(model, train_loader, val_loader, criterion, optimizer)
+        if config["operation"]["silent"] is False: print(f"Epoch {epoch + 1} - Train Loss: {train_loss} - Val Loss: {val_loss}")
+        
+        train_acc, _, _ = evaluate_accuracy(model, train_loader)
+        val_acc, _, _ = evaluate_accuracy(model, val_loader)
+        
+        history.loc[epoch] = [train_loss, val_loss, train_acc, val_acc]
+        
+    return history
+        
+    
+        
+    

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません