-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
88 lines (67 loc) · 3.12 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
import os
import json
import pprint as pp
import torch
import wandb
from tensorboard_logger import Logger as TbLogger
from nets.gcn import Net1, Net2
from train import train
from options import get_options
from utils.data_utils import load_dataset, save_dataset, check_extension
from utils.functions import compute_embeddings
def run(opts):
# disable sync
os.environ['WANDB_MODE'] = 'dryrun'
# initialize wandb
wandb.init(project='Network1')
# load all arguments to config to save as hyperparameters
wandb.config.update(opts)
# Pretty print the run args
pp.pprint(vars(opts))
# Set the random seed
torch.manual_seed(opts.seed)
# Optionally configure tensorboard
tb_logger = None
if not opts.no_tensorboard:
tb_logger = TbLogger(
os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name))
os.makedirs(opts.save_dir)
# Save arguments so exact configuration can always be found
with open(os.path.join(opts.save_dir, "args.json"), 'w') as f:
json.dump(vars(opts), f, indent=True)
# Set the device
opts.device = torch.device("cuda" if opts.use_cuda else "cpu")
# load created dataset from path
train_dataset = load_dataset(filename=opts.train_dataset)
# For now, val and train dataset are same
val_dataset = load_dataset(filename=opts.val_dataset)
# initialize model(need to be modified for regression case)
model = Net1(n_features=train_dataset[0].num_features, embed_dim=opts.embed_dim,
out_features=opts.num_output).to(opts.device)
# code for multiple gpu model(disabled for now)
# enable once the model runs successfully for single GPU
# if opts.use_cuda and torch.cuda.device_count() > 1:
# print("No. of GPUs:", torch.cuda.device_count())
# model = torch.nn.DataParallel(model)
# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# save pytorch model and track all of the gradients and optionally parameters
wandb.watch(model, log='all') # "gradients", "parameters", "all", or None.
# start training
training_status = train(model, optimizer, train_dataset, val_dataset, tb_logger, opts)
print(training_status)
# Get the embeddings and save to create data for Network2
scores, embeddings = compute_embeddings(model, opts, data=train_dataset)
# save embeddings to create dataset for Network2
filename = os.path.join(opts.data_dir, "op{}{}_{}N{}_seed{}_label_{}_embeddings.pkl".format(
"_{}".format(opts.data_distribution) if opts.data_distribution is not None else "",
opts.graph_size, opts.name, opts.dataset_size, opts.dataset_seed, opts.label_type))
assert opts.f or not os.path.isfile(check_extension(filename)), \
"File already exists! Try running with -f option to overwrite."
print(filename)
save_dataset([embeddings, scores], filename)
print("Embeddings Computed, shape:{}" .format(embeddings.shape))
print("Scores Computed, shape:{}" .format(scores.shape))
if __name__ == '__main__':
run(get_options())