Spaces:
Runtime error
Runtime error
File size: 6,107 Bytes
32d3d37 948ce80 32d3d37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
from models import *
from scipy.ndimage import gaussian_filter1d
from torch.utils.tensorboard import SummaryWriter # print to tensorboard
from torchvision.utils import make_grid
import pandas as pd
from handetect.configs import *
import data_loader
# torch.cuda.empty_cache()
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024"
writer = SummaryWriter(log_dir="output/tensorboard")
# Data loader
train_loader, valid_loader = data_loader.load_data(
ORIG_DATA_DIR, AUG_DATA_DIR, preprocess
)
# Initialize model, criterion, optimizer, and scheduler
MODEL = MODEL.to(DEVICE)
criterion = nn.CrossEntropyLoss()
# Adam optimizer
optimizer = optim.Adam(MODEL.parameters(), lr=LEARNING_RATE)
# StepLR scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
# Lists to store training and validation loss history
TRAIN_LOSS_HIST = []
VAL_LOSS_HIST = []
AVG_TRAIN_LOSS_HIST = []
AVG_VAL_LOSS_HIST = []
TRAIN_ACC_HIST = []
VAL_ACC_HIST = []
# Training loop
for epoch in range(NUM_EPOCHS):
MODEL.train(True) # Set model to training mode
running_loss = 0.0
total_train = 0
correct_train = 0
for i, (inputs, labels) in enumerate(train_loader, 0):
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
optimizer.zero_grad()
outputs = MODEL(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i + 1) % NUM_PRINT == 0:
print(
"[Epoch %d, Batch %d] Loss: %.6f"
% (epoch + 1, i + 1, running_loss / NUM_PRINT)
)
running_loss = 0.0
_, predicted = torch.max(outputs, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
TRAIN_ACC_HIST.append(correct_train / total_train)
TRAIN_LOSS_HIST.append(loss.item())
# Calculate the average training loss for the epoch
avg_train_loss = running_loss / len(train_loader)
writer.add_scalar("Loss/Train", avg_train_loss, epoch)
writer.add_scalar("Accuracy/Train", correct_train / total_train, epoch)
AVG_TRAIN_LOSS_HIST.append(avg_train_loss)
# Print average training loss for the epoch
print("[Epoch %d] Average Training Loss: %.6f" % (epoch + 1, avg_train_loss))
# Learning rate scheduling
lr_1 = optimizer.param_groups[0]["lr"]
print("Learning Rate: {:.15f}".format(lr_1))
scheduler.step()
# Validation loop
MODEL.eval() # Set model to evaluation mode
val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
for inputs, labels in valid_loader:
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
outputs = MODEL(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
# Calculate accuracy
_, predicted = torch.max(outputs, 1)
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
VAL_LOSS_HIST.append(loss.item())
# Calculate the average validation loss for the epoch
avg_val_loss = val_loss / len(valid_loader)
AVG_VAL_LOSS_HIST.append(loss.item())
print("Average Validation Loss: %.6f" % (avg_val_loss))
# Calculate the accuracy of validation set
val_accuracy = correct_val / total_val
VAL_ACC_HIST.append(val_accuracy)
print("Validation Accuracy: %.6f" % (val_accuracy))
writer.add_scalar("Loss/Validation", avg_val_loss, epoch)
writer.add_scalar("Accuracy/Validation", val_accuracy, epoch)
# Add sample images to TensorBoard
sample_images, _ = next(iter(valid_loader)) # Get a batch of sample images
sample_images = sample_images.to(DEVICE)
grid_image = make_grid(
sample_images, nrow=8, normalize=True
) # Create a grid of images
writer.add_image("Sample Images", grid_image, global_step=epoch)
# End of training loop
# Save the model
torch.save(MODEL.state_dict(), MODEL_SAVE_PATH)
print("Model saved at", MODEL_SAVE_PATH)
print("Generating loss plot...")
# train_loss_line = gaussian_filter1d(TRAIN_LOSS_HIST, sigma=10)
# val_loss_line = gaussian_filter1d(VAL_LOSS_HIST, sigma=10)
# plt.plot(range(1, NUM_EPOCHS + 1), train_loss_line, label='Train Loss')
# plt.plot(range(1, NUM_EPOCHS + 1), val_loss_line, label='Validation Loss')
avg_train_loss_line = gaussian_filter1d(AVG_TRAIN_LOSS_HIST, sigma=2)
avg_val_loss_line = gaussian_filter1d(AVG_VAL_LOSS_HIST, sigma=2)
train_loss_line = gaussian_filter1d(TRAIN_LOSS_HIST, sigma=2)
val_loss_line = gaussian_filter1d(VAL_LOSS_HIST, sigma=2)
train_acc_line = gaussian_filter1d(TRAIN_ACC_HIST, sigma=2)
val_acc_line = gaussian_filter1d(VAL_ACC_HIST, sigma=2)
plt.plot(range(1, NUM_EPOCHS + 1), train_loss_line, label="Train Loss")
plt.plot(range(1, NUM_EPOCHS + 1), val_loss_line, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Train Loss and Validation Loss")
plt.savefig("loss_plot.png")
plt.clf()
plt.plot(range(1, NUM_EPOCHS + 1), avg_train_loss_line, label="Average Train Loss")
plt.plot(range(1, NUM_EPOCHS + 1), avg_val_loss_line, label="Average Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Average Train Loss and Average Validation Loss")
plt.savefig("avg_loss_plot.png")
plt.clf()
plt.plot(range(1, NUM_EPOCHS + 1), train_acc_line, label="Train Accuracy")
plt.plot(range(1, NUM_EPOCHS + 1), val_acc_line, label="Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Train Accuracy and Validation Accuracy")
plt.savefig("accuracy_plot.png")
dummy_input = torch.randn(1, 3, 64, 64).to(DEVICE) # Adjust input shape accordingly
writer.add_graph(MODEL, dummy_input)
# Close TensorBoard writer
writer.close()
|