-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
172 lines (130 loc) · 5.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import shutil
import datetime
import os
import matplotlib.pyplot as plt
def create_result_folder(result_dir, exp_label):
# copy current folder to a result directory
time_str = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
result_dir = os.path.join(result_dir, exp_label+time_str)
# copy all the files in cwd to result_dir, except for the 'data.csv' file
shutil.copytree('.', os.path.join(result_dir, 'source'), ignore=shutil.ignore_patterns('data.csv'))
print('Copy code to ' + result_dir)
# save experiment information to the result folder
import sys
import time
txt_file_name = 'Experiment information.txt'
str_cmd = 'terminal command for this result folder:\npython '
for _ in sys.argv:
str_cmd += _
str_cmd += '\n\n'
text_file = open(os.path.join(result_dir, txt_file_name), "w")
n = text_file.write(str_cmd)
# get current time and time zone
str_time = 'Job start time: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + ', time zone: ' + str(time.tzname[1])
str_time += '\n\n'
text_file.write(str_time)
text_file.write('Current working directory: ' + os.getcwd())
text_file.close()
return result_dir
def check_gpu_usage():
"""Return available memory for each GPU as a list.
Based on https://gist.github.com/afspies/7e211b83ca5a8902849b05ded9a10696?permalink_comment_id=4127781#gistcomment-4127781
"""
# Process exceptions -> we don't care about such procs
# User exceptions -> we care ONLY about procs of this user
pynvml.nvmlInit()
# print ("Driver Version:", pynvml.nvmlSystemGetDriverVersion())
deviceCount = pynvml.nvmlDeviceGetCount()
gpus_avail_mem = []
for i in range(deviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
free_memory = mem.free / (1024 ** 3)
gpus_avail_mem.append(free_memory)
# if base_on_memory and free_memory < min_memory:
# continue
#
# free = True
# if base_on_process:
# procs = [*pynvml.nvmlDeviceGetComputeRunningProcesses(handle),
# *pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle)]
# for p in procs:
# try:
# process = psutil.Process(p.pid)
# except psutil.NoSuchProcess:
# continue
#
# if process.name not in process_exceptions and process.username() in user_exceptions:
# free = False
# break
# if free:
# free_gpus.append(str(i))
# print(f"[[GPU INFO]] [{','.join(gpus_avail_mem)}] Free")
# pynvml.nvmlShutdown()
return gpus_avail_mem
# automatically select PyTorch device
import torch
import psutil
if torch.cuda.is_available():
import pynvml as pynvml
def get_device(gpu_id=None):
"""
Use cpu if gpu available.
If gpu_id is feasible, use gpu with given gpu_id;
otherwise select gpu with largest free memory.
"""
if torch.cuda.is_available():
gpus_avail_mem = check_gpu_usage()
# get cuda device amount
n_cuda = torch.cuda.device_count()
# raise DeprecationWarning("This function is deprecated. Please use the function in utils.py")
if gpu_id == -1 or gpu_id is None:
print('Use GPU with largest free memory.')
print(gpus_avail_mem)
gpu_id = int(np.argmax(gpus_avail_mem))
if gpu_id + 1 > n_cuda:
print('No GPU available with id {}.'.format(gpu_id))
print(gpus_avail_mem)
gpu_id = int(np.argmax(gpus_avail_mem))
print('Use GPU with largest free memory.')
device = torch.device('cuda:{}'.format(gpu_id))
else:
print('No GPU available. Use CPU instead.')
device = torch.device('cpu')
gpu_id = None
# OS = platform.system()
# if OS == 'Windows' or OS == 'Linux':
# if torch.cuda.is_available():
# device = torch.device('cuda')
# else:
# print('No GPU available. Use CPU instead.')
# device = torch.device('cpu')
# if OS == 'Darwin':
# if not torch.backends.mps.is_available():
# if not torch.backends.mps.is_built():
# print("MPS not available because the current PyTorch install was not "
# "built with MPS enabled. Use CPU instead.")
# else:
# print("MPS not available because the current MacOS version is not 12.3+ "
# "and/or you do not have an MPS-enabled device on this machine. Use CPU instead.")
# else:
# device = torch.device("mps")
print('Using device: {}'.format(device))
return gpu_id, device
# record and visualize memory usage
import os, psutil
process = psutil.Process(os.getpid())
class MemoryRecoder():
def __init__(self, result_dir, with_time=False):
self.result_dir = result_dir
self.rec = {}
if with_time:
raise NotImplemented
def record(self, label):
self.rec[label] = process.memory_info().rss / 1024 / 1024 # in MB
def plot(self):
plt.figure()
plt.plot(self.rec.keys(), self.rec.values())
plt.title('Memory usage in MB')
plt.savefig(os.path.join(self.result_dir, 'memory_rec.png'))
plt.close()