Explorar o código

Implemented transformation capabilities

Ruben Aguilo Schuurs hai 1 mes
pai
achega
c359966bd1
Modificáronse 3 ficheiros con 108 adicións e 13 borrados
  1. 0 1
      main.py
  2. 97 0
      utils/dataset_sd_mean_finder.py
  3. 11 12
      utils/preprocess.py

+ 0 - 1
main.py

@@ -78,7 +78,6 @@ while x < 0:
     x = x+1
 
 
-
 epochs = 20
 roc = True
 CNN = CNN_Net(prps=properties, final_layer_size=2)

+ 97 - 0
utils/dataset_sd_mean_finder.py

@@ -0,0 +1,97 @@
+from preprocess import prepare_datasets
+from train_methods import train, load, evaluate, predict
+from CNN import CNN_Net
+from torch.utils.data import DataLoader
+
+model_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN'
+CNN_filepath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/cnn_net.pth'       # cnn_net.pth
+# small dataset
+# mri_datapath = '/data/data_wnx1/rschuurs/Pytorch_CNN-RNN/PET_volumes_customtemplate_float32/'   # Small Test
+# big dataset
+mri_datapath = '/data/data_wnx1/_Data/AlzheimersDL/CNN+RNN-2class-1cnn+data/PET_volumes_customtemplate_float32/'   # Real data
+annotations_datapath = './data/data_wnx1/rschuurs/Pytorch_CNN-RNN/LP_ADNIMERGE.csv'
+
+
+# LOADING DATA
+val_split = 0.2     # % of val and test, rest will be train
+seed = 12       # TODO Randomize seed
+
+properties = {
+    "batch_size":32,
+    "padding":0,
+    "dilation":1,
+    "groups":1,
+    "bias":True,
+    "padding_mode":"zeros",
+    "drop_rate":0,
+    "epochs": 20,
+    "lr": [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6],  # Unused
+    'momentum':[0.99, 0.97, 0.95, 0.9],  # Unused
+    'weight_decay':[1e-3, 1e-4, 1e-5, 0]    # Unused
+}
+
+
+
+# TODO: Datasets include multiple labels, such as medical info
+training_data, val_data, test_data = prepare_datasets(mri_datapath, val_split, seed)
+
+# Create data loaders
+train_dataloader = DataLoader(training_data, batch_size=properties['batch_size'], shuffle=True, drop_last=True)
+val_dataloader = DataLoader(val_data, batch_size=properties['batch_size'], shuffle=True)        # Used during training
+test_dataloader = DataLoader(test_data, batch_size=properties['batch_size'], shuffle=True)      # Used at end for graphs
+
+
+
+
+# HERE'S ACTUAL CODE
+mean = 0.
+std = 0.
+nb_samples = 0.
+for data in train_dataloader:
+    batch_samples = data.size(0)
+    data = data.view(batch_samples, data.size(1), -1)
+    mean += data.mean(2).sum(0)
+    std += data.std(2).sum(0)
+    nb_samples += batch_samples
+
+mean /= nb_samples
+std /= nb_samples
+
+print(mean)
+print(std)
+
+
+
+mean = 0.
+std = 0.
+nb_samples = 0.
+for data in val_dataloader:
+    batch_samples = data.size(0)
+    data = data.view(batch_samples, data.size(1), -1)
+    mean += data.mean(2).sum(0)
+    std += data.std(2).sum(0)
+    nb_samples += batch_samples
+
+mean /= nb_samples
+std /= nb_samples
+
+print(mean)
+print(std)
+
+
+
+mean = 0.
+std = 0.
+nb_samples = 0.
+for data in test_dataloader:
+    batch_samples = data.size(0)
+    data = data.view(batch_samples, data.size(1), -1)
+    mean += data.mean(2).sum(0)
+    std += data.std(2).sum(0)
+    nb_samples += batch_samples
+
+mean /= nb_samples
+std /= nb_samples
+
+print(mean)
+print(std)

+ 11 - 12
utils/preprocess.py

@@ -5,7 +5,7 @@ import pandas as pd
 import random
 # import torch
 from torch.utils.data import Dataset
-import torchvision.transforms as transforms
+from torchvision.transforms import v2
 import re
 
 
@@ -63,20 +63,17 @@ def prepare_datasets(mri_dir, val_split=0.2, seed=50):
     print(f"Validation size: {len(val_list)}")
     print(f"Test size: {len(test_list)}")
 
+    transformation = v2.Compose([
+        v2.Normalize([0.5],[0.5]),      # TODO Get Vals from dataset
+        # TODO CHOOSE WHAT TRANSFORMATIONS TO DO
+    ])
 
-    # # TRANSFORM
-    # transform = transforms.Compose([
-    #     transforms.Grayscale(num_output_channels=1)
-    # ])
-
-    train_dataset = CustomDataset(train_list)
-    val_dataset = CustomDataset(val_list)
-    test_dataset = CustomDataset(test_list)
+    train_dataset = CustomDataset(train_list, transformation)
+    val_dataset = CustomDataset(val_list, transformation)
+    test_dataset = CustomDataset(test_list, transformation)
 
     return train_dataset, val_dataset, test_dataset
 
-    # TODO  Normalize data? Later add
-
 
 def prepare_predict(mri_dir, IDs):
 
@@ -133,8 +130,9 @@ def get_train_val_test(AD_list, NL_list, val_split):
 
 
 class CustomDataset(Dataset):
-    def __init__(self, list):
+    def __init__(self, list, transform):
         self.data = list        # INPUT DATA: (image_dir, class_id, (clinical_data))
+        self.transform = transform
 
     def __len__(self):
         return len(self.data)
@@ -144,6 +142,7 @@ class CustomDataset(Dataset):
         mri = nib.load(mri_path)
         image = np.asarray(mri.dataobj)
         mri_data = np.asarray(np.expand_dims(image, axis=0))
+        mri_data = self.transform(mri_data)
 
         # mri_data = mri.get_fdata()
         # mri_array = np.array(mri)