-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.py
executable file
·156 lines (131 loc) · 5.47 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# benchmark.py
# Reads in environment variables from environment.env
# Initializes the environment for osu-benchmark
# This Python Script needs to be run when new nodes were added / removed
# DEPENDENCIES
import os, csv, dotenv, subprocess, sys, random
import pandas as pd
# ENVIRONMENT VARIABLES
# These are read from environment.env in the working directory.
# Python-dotenv format is used.
cwd = os.getcwd()
dotenv.load_dotenv(cwd+'/environment.env')
sinfo = os.getenv('SINFO') # path to sinfo command
sbatch = os.getenv('SBATCH') # path to sbatch command
squeue = os.getenv('SQUEUE') # path to squeue command
data_dir = os.getenv('DATA_DIR') # path where output files are stored
inst_dir = os.getenv('INST_DIR') # path to osu-mpi module
osu_module_name = os.getenv('OSU_MODULE_NAME') # name of module (with version)
py_module_name = os.getenv('PY_MODULE_NAME') # name of python module used in setting up venv
inst_include = inst_dir + osu_module_name + '/osu-benchmarks'
sample = os.getenv('JOBS_PER_HR') # number of jobs per hour
njobs = int(os.getenv('N_JOBS')) # number of repetitions for benchmark
# These are predefined list of target nodes that will be analyzed through benchmark
# TODO: In the future, it may be sensible to change this to predefined list of architectures.
# As of now (May 2020), newer nodes (node1333+) do not explicitly show architecture when sinfo is invoked.
# arch = os.getenv('ARCH').split(',') can replace these.
node_start = int(os.getenv('NODE_START'))
node_finish = int(os.getenv('NODE_FINISH'))
# GLOBAL VARIABLES
# Ideally, the line that follows should be removed when TODO in line 25 is implemented.
arch_list = ['sandy', 'ivy', 'haswell', 'broadwell', 'skylake', 'unspecified']
job_list =['bibw', 'latency']
# CLASS DEFINITIONS
# Dictionary with autovivification
# Refer to https://en.wikipedia.org/wiki/Autovivification#Python for details
class Tree(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
# Class containing current node states
class NodeStat():
def __init__(self):
# First collect the list of nodes from sinfo
nodels_r = subprocess.run([sinfo + " --N -o %N,%f"], shell=True, stdout = subprocess.PIPE)
nodels = nodels_r.stdout.decode()
# Order the information into usable format
self.__nodeInfo = {} # nodeXXX: 32core, intel, haswell
self.__nodeArch = {} # haswell: nodeXXX, nodeYYY, nodeZZZ
for group in nodels.split('\n'):
values = group.split(',')
# If key is included in the list of target arch
if (values[0] >= node_start and values[0] <= node_finish):
key = values[0]
self.__nodeInfo[key] = []
archSpecified = False
for v in values[1:]:
self.__nodeInfo[key].append(v)
if (v in arch_list and not archSpecified):
self.__nodeArch[v] = key
archSpecified = True
if not archSpecified:
# For node1333+
self.__nodeArch['unspecified'] = key
# Set some private variables
# none for now / placeholder
def randNode(self, arch = 'any', ratio):
# Return random list of pair of nodes with specified architecture of length determined by ratio
# If unspecified, return any
# First determine number of nodes to return
if arch == 'any':
arch = arch_list[random.randrange(0, len(arch_list))]
nRandNode = nNodes * nNodes / 2 * ratio
rNodePairs = []
nodesList = list(self.__nodeArch[arch].items())
nNodes = len(nodesList)
# Now sample pairs of nodes to benchmark
while nRandNode > 0:
i = random.randrange(0, nNodes)
j = random.randrange(i+1, nNodes)
rNodePairs.append([nodesList[i], nodesList[j]])
return rNodePairs
class SLURMJob():
def __init__(self):
self.__scriptList = []
def __init__(self, nodePairList):
self.__scriptList = []
self.__buildList(nodePairList)
def buildList(self, nodePairList)
# For each node pairs
for nodePair in nodePairList:
# Build SLURM Job script from scratch
curScript = [
'#!/bin/bash',
'#Autogenerated SLURM script for osu-benchmark',
]
curScript.append(''.join(['#SBATCH --time=0:', str(njobs * 5),':00']))
curScript.append('#SBATCH --nodes=2')
curScript.append('#SBATCH --tasks-per-node=1')
curScript.append('#SBATCH --exclusive')
curScript.append(''.join(['#SBATCH -o ', data_dir, '/%j.out']))
# Line 5 / Load Modules
curScript.append(''.join(['module load ', py_module_name, ' ', osu_module_name]))
# Line 6 / Source venv
curScript.append(''.join(['source ', inst_include, '/../osu_env/bin/activate']))
# For each job
for jobtype in job_list:
# For each iteration
for iteration in njobs:
curScript.append(''.join([inst_include, '/run_osu_', jobtype]))
self.__scriptList.append(curScript)
def getScript(self):
return self.__scriptList
def main():
# First build NodeStat object
curNode = NodeStat()
# Then get list of random pairs of Nodes
nodePairList = curNode.randNode()
# Build SLURM submission script
curSLURMjob = SLURMJob(nodePairList)
# Read them out
scriptList = curSLURMjob.getScript()
# Run each script in the list
for i in range(len(scriptList)):
# Write out and use subprocess to execute on SLURM
with open(data_dir + '/SLURMinput_' + str(i), 'w', newline="") as file:
file.write("\n".join(str(line) for lin in scriptList[i]))
subprocess.run(''.join(['sbatch ', data_dir, '/SLURMinput_', str(i)]))
# TODO
# Execute if main
if __name__ == 'main':
main() # change to something appropriate