Ver código fonte

Fixed saving files when directory does not exist, clarified dataloader lengths and corrected loss calculation

Nicholas Schense 10 meses atrás
pai
commit
cdec14c561
3 arquivos alterados com 17 adições e 9 exclusões
  1. 3 1
      .gitignore
  2. 4 1
      main.py
  3. 10 7
      utils/training.py

+ 3 - 1
.gitignore

@@ -1,4 +1,6 @@
 .venv
 .venv
 __pycache__
 __pycache__
 utils/__pycache__
 utils/__pycache__
-saved_models/
+saved_models/
+training_records/
+plots/

+ 4 - 1
main.py

@@ -6,6 +6,7 @@ from torch import nn
 import numpy as np
 import numpy as np
 from datetime import datetime
 from datetime import datetime
 import pandas as pd
 import pandas as pd
+import os
 
 
 # FOR TRAINING
 # FOR TRAINING
 import torch.optim as optim
 import torch.optim as optim
@@ -23,7 +24,7 @@ print("Pytorch Version: " + torch. __version__)
 # data & training properties:
 # data & training properties:
 val_split = 0.2     # % of val and test, rest will be train
 val_split = 0.2     # % of val and test, rest will be train
 runs = 1
 runs = 1
-epochs = 5
+epochs = 30
 seeds = [np.random.randint(0, 1000) for _ in range(runs)]
 seeds = [np.random.randint(0, 1000) for _ in range(runs)]
 
 
 #Data Path
 #Data Path
@@ -55,6 +56,8 @@ for seed in seeds:
     plot_results(train_results["train_acc"], train_results["train_loss"], train_results["val_acc"], train_results["val_loss"], "CNN", time_stamp, plot_path)
     plot_results(train_results["train_acc"], train_results["train_loss"], train_results["val_acc"], train_results["val_loss"], "CNN", time_stamp, plot_path)
     
     
     #Save training results
     #Save training results
+    if not os.path.exists(training_record_path):
+        os.makedirs(training_record_path)
     train_results.to_csv(training_record_path + "CNN_t-" + time_stamp + "_s-" + str(seed) + "_e-" + str(epochs) + ".csv")
     train_results.to_csv(training_record_path + "CNN_t-" + time_stamp + "_s-" + str(seed) + "_e-" + str(epochs) + ".csv")
     
     
     
     

+ 10 - 7
utils/training.py

@@ -33,8 +33,8 @@ def train_model(model, seed, timestamp, epochs, train_loader, val_loader, saved_
         train_corr = 0
         train_corr = 0
         
         
         #Training
         #Training
-        length = len(train_loader)
-        for _, data in tqdm(enumerate(train_loader, 0), total=length, desc="Epoch " + str(epoch), unit="batch"):
+        train_length = len(train_loader)
+        for _, data in tqdm(enumerate(train_loader, 0), total=train_length, desc="Epoch " + str(epoch), unit="batch"):
             mri, xls, label = data
             mri, xls, label = data
 
 
             optimizer.zero_grad()
             optimizer.zero_grad()
@@ -58,7 +58,7 @@ def train_model(model, seed, timestamp, epochs, train_loader, val_loader, saved_
             train_corr += (predicted == labels).sum().item()
             train_corr += (predicted == labels).sum().item()
             train_incc += (predicted != labels).sum().item()
             train_incc += (predicted != labels).sum().item()
             
             
-        train_losses.append(train_loss)
+        train_losses.append(train_loss / train_length)
         train_accs.append(train_corr / (train_corr + train_incc))
         train_accs.append(train_corr / (train_corr + train_incc))
         
         
         
         
@@ -68,6 +68,7 @@ def train_model(model, seed, timestamp, epochs, train_loader, val_loader, saved_
             val_incc = 0
             val_incc = 0
             val_corr = 0
             val_corr = 0
             
             
+            val_length = len(val_loader)
             for _, data in enumerate(val_loader, 0):
             for _, data in enumerate(val_loader, 0):
                 mri, xls, label = data
                 mri, xls, label = data
 
 
@@ -86,7 +87,7 @@ def train_model(model, seed, timestamp, epochs, train_loader, val_loader, saved_
                 val_corr += (predicted == labels).sum().item()
                 val_corr += (predicted == labels).sum().item()
                 val_incc += (predicted != labels).sum().item()
                 val_incc += (predicted != labels).sum().item()
             
             
-        val_losses.append(val_loss)
+        val_losses.append(val_loss / val_length)
         val_accs.append(val_corr / (val_corr + val_incc))
         val_accs.append(val_corr / (val_corr + val_incc))
         
         
         epoch_number += 1
         epoch_number += 1
@@ -111,7 +112,7 @@ def test_model(model, test_loader, cuda_device=torch.device('cuda:0')):
     print("--- TESTING MODEL ---")
     print("--- TESTING MODEL ---")
     #Test model
     #Test model
     correct = 0
     correct = 0
-    total = 0
+    incorrect = 0
 
 
     with torch.no_grad():
     with torch.no_grad():
         length = len(test_loader)
         length = len(test_loader)
@@ -127,10 +128,10 @@ def test_model(model, test_loader, cuda_device=torch.device('cuda:0')):
             _, predicted = torch.max(outputs.data, 1)
             _, predicted = torch.max(outputs.data, 1)
             _, labels = torch.max(label.data, 1)
             _, labels = torch.max(label.data, 1)
 
 
-            total += labels.size(0)
+            incorrect += (predicted != labels).sum().item()
             correct += (predicted == labels).sum().item()
             correct += (predicted == labels).sum().item()
 
 
-    print("Model Accuracy: ", 100 * correct / total)
+    print("Model Accuracy: ", 100 * correct / (correct + incorrect))
 
 
 def initalize_dataloaders(mri_path, xls_path, val_split, seed, cuda_device=torch.device('cuda:0')):
 def initalize_dataloaders(mri_path, xls_path, val_split, seed, cuda_device=torch.device('cuda:0')):
     training_data, val_data, test_data = prepare_datasets(mri_path, xls_path, val_split, seed)
     training_data, val_data, test_data = prepare_datasets(mri_path, xls_path, val_split, seed)
@@ -145,6 +146,8 @@ def initalize_dataloaders(mri_path, xls_path, val_split, seed, cuda_device=torch
 
 
 def plot_results(train_acc, train_loss, val_acc, val_loss, model_name, timestamp, plot_path):
 def plot_results(train_acc, train_loss, val_acc, val_loss, model_name, timestamp, plot_path):
     #Create 2 plots, one for accuracy and one for loss
     #Create 2 plots, one for accuracy and one for loss
+    if not os.path.exists(plot_path):
+        os.makedirs(plot_path)
     
     
     #Accuracy Plot
     #Accuracy Plot
     plt.figure()
     plt.figure()