-
Notifications
You must be signed in to change notification settings - Fork 8
/
train.py
175 lines (154 loc) · 8.97 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""General-purpose training script for image-to-image translation.
This script works for various models (with option '--model': e.g., pix2pix, cyclegan, colorization) and
different datasets (with option '--dataset_mode': e.g., aligned, unaligned, single, colorization).
You need to specify the dataset ('--dataroot'), experiment name ('--name'), and model ('--model').
It first creates model, dataset, and visualizer given the option.
It then does standard network training. During the training, it also visualize/save the images, print/save the loss plot, and save models.
The script supports continue/resume training. Use '--continue_train' to resume your previous training.
Example:
Train a CycleGAN model:
python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan
Train a pix2pix model:
python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA
See options/base_options.py and options/train_options.py for more training options.
See training and test tips at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/tips.md
See frequently asked questions at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/qa.md
"""
import time
from options.train_options import TrainOptions
from data import create_dataset
from models import create_model
from util.visualizer import Visualizer
import matplotlib.pyplot as plt
import os
import pandas as pd
import torch
import numpy as np
import GPUtil
if __name__ == '__main__':
# Added for maintaining same initialization for weights
# seed = 3
# np.random.seed(seed)
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False
torch.cuda.empty_cache()
opt = TrainOptions().parse() # get training options
dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options
dataset_size = len(dataset) # get the number of images in the dataset.
print('The number of training images = %d' % dataset_size)
print('shuffle :{}'.format(not opt.serial_batches))
model = create_model(opt) # create a model given opt.model and other options
# model.print_networks(verbose=True)
model.setup(opt) # regular setup: load and print networks; create schedulers
visualizer = Visualizer(opt) # create a visualizer that display/save images and plots
total_iters = 0 # the total number of training iterations
total_time_start = time.time()
loss_summary = []
loss_summary_test = []
# ##Create test dataset for evaluation of test loss
optTest = opt
optTest.phase = 'test'
optTest.num_threads = 0 # test code only supports num_threads = 1
optTest.batch_size = 1 # test code only supports batch_size = 1
optTest.serial_batches = True # disable data shuffling; comment this line if results on randomly chosen images are needed.
optTest.no_flip = True # no flip; comment this line if results on flipped images are needed.
optTest.display_id = -1 # no visdom display; the test code saves the results to a HTML file.
optTest.isTrain = False
optTest.mode = 'eval'
dataset_test = create_dataset(optTest)
dataset_size_test = len(dataset_test) # get the number of images in the dataset.
print('The number of validation images = %d' % dataset_size_test)
for epoch in range(opt.epoch_count,
opt.n_epochs + opt.n_epochs_decay + 1): # outer loop for different epochs; we save the model by <epoch_count>, <epoch_count>+<save_latest_freq>
epoch_start_time = time.time() # timer for entire epoch
iter_data_time = time.time() # timer for data loading per iteration
epoch_iter = 0 # the number of training iterations in current epoch, reset to 0 every epoch
visualizer.reset() # reset the visualizer: make sure it saves the results to HTML at least once every epoch
model.set_epoch(epoch)
total_loss_each_epoch = 0
for i, data in enumerate(dataset): # inner loop within one epoch
iter_start_time = time.time() # timer for computation per iteration
if total_iters % opt.print_freq == 0:
t_data = iter_start_time - iter_data_time
total_iters += opt.batch_size
epoch_iter += opt.batch_size
model.set_input(data) # unpack data from dataset and apply preprocessing
# if i == 0:
# model.save_weights(opt)
model.optimize_parameters() # calculate loss functions, get gradients, update network weights
# GPUtil.showUtilization()
if total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file
save_result = total_iters % opt.update_html_freq == 0
model.compute_visuals()
visualizer.display_current_results(model.get_current_visuals(), epoch, save_result)
if total_iters % opt.print_freq == 0: # print training losses and save logging information to the disk
losses = model.get_current_losses()
total_loss_each_epoch = total_loss_each_epoch + sum(losses.values())
t_comp = (time.time() - iter_start_time) / opt.batch_size
visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data)
# if opt.display_id > 0:
# visualizer.plot_current_losses(epoch, float(epoch_iter) / dataset_size, losses)
if total_iters % opt.save_latest_freq == 0: # cache our latest model every <save_latest_freq> iterations
print('saving the latest model (epoch %d, total_iters %d)' % (epoch, total_iters))
save_suffix = 'iter_%d' % total_iters if opt.save_by_iter else 'latest'
model.save_networks(save_suffix)
iter_data_time = time.time()
if epoch % opt.save_epoch_freq == 0: # cache our model every <save_epoch_freq> epochs
print('saving the model at the end of epoch %d, iters %d' % (epoch, total_iters))
model.save_networks('latest')
model.save_networks(epoch)
print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.n_epochs + opt.n_epochs_decay, time.time() - epoch_start_time))
model.update_learning_rate() # update learning rates at the end of every epoch.
avg_loss_each_epoch_train = total_loss_each_epoch/len(dataset)
print('Avg Loss at the end of epoch {}:{}'.format(epoch, avg_loss_each_epoch_train))
loss_summary.append(avg_loss_each_epoch_train)
#Evaluating Validation Loss
total_loss_each_epoch_test = 0
model.eval()
for i, data_test in enumerate(dataset_test):
# if i >= opt.num_test: # only apply our model to opt.num_test images.
# break
with torch.no_grad():
model.set_input(data_test) # unpack data from data loader
model.test()
model.calculate_validation_loss()
val_losses = model.get_current_losses()
# print('Validation loss for iter {} is {}'.format(i+1, val_losses))
total_loss_each_epoch_test = total_loss_each_epoch_test + sum(val_losses.values())
avg_loss_each_epoch_test = total_loss_each_epoch_test / len(dataset_test)
print('Avg Test Loss at the end of epoch {}:{}'.format(epoch, avg_loss_each_epoch_test))
loss_summary_test.append(avg_loss_each_epoch_test)
##Reset to training mode
model.train()
# plt.plot(losssummary, np.arange(opt.epoch_count, opt.n_epochs + opt.n_epochs_decay, 1), 'r--')
total_time_end = time.time()
total_time = total_time_end - total_time_start
print('Total Time for training is {}'.format(total_time))
##print total loss after each epoch
loss_dir = os.path.join(opt.checkpoints_dir, opt.name)
loss_filename = 'loss_log.txt'
loss_filename = os.path.join(loss_dir, loss_filename)
with open(loss_filename, 'a') as f:
print('Total Time for training is {}'.format(total_time), file=f)
loss_filename = 'total_loss_train.txt'
loss_filename = os.path.join(loss_dir, loss_filename)
# print(loss_summary)
print(loss_filename)
with open(loss_filename, 'w') as f:
for i, line in enumerate(loss_summary):
print("{} {}".format(i + 1, line), file=f)
df = pd.read_csv(loss_filename, delim_whitespace=True, header=None)
df.columns = ["Epoch", "Train_Loss"]
df.to_excel(r"{}.xlsx".format(loss_filename))
loss_filename = 'total_loss_test.txt'
loss_filename = os.path.join(loss_dir, loss_filename)
# print(loss_summary)
print(loss_filename)
with open(loss_filename, 'w') as f:
for i, line in enumerate(loss_summary_test):
print("{} {}".format(i + 1, line), file=f)
df = pd.read_csv(loss_filename, delim_whitespace=True, header=None)
df.columns = ["Epoch", "Test_Loss"]
df.to_excel(r"{}.xlsx".format(loss_filename))