-
Notifications
You must be signed in to change notification settings - Fork 1
/
plams_defaults
189 lines (134 loc) · 8.39 KB
/
plams_defaults
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#Below you can find a list of commands that populate the default settings ("config") when init() is called. To adjust a particular setting for just one run, you can copy the corresponding line to your script and modify it as you wish. Changing values in this file will have a global effect on all future PLAMS runs
#Point $PLAMSDEFAULTS to this directory
#==== General defaults =====================================================
#Enable preview mode: no actual calculations are run, only inputs and runscripts are prepared
config.preview = False
#Defines a unit of time which is used whenever some action needs to be repeated until a certain condition is met (i.e. querying queue manager about the job submitted to a cluster)
config.sleepstep = 5
#Defines what happens when results of failed/crashed job are accessed
#If False, an exception is thrown
#If True, PLAMS will try to obtain the requested results anyway and inform you about it in the log (level 3)
config.ignore_failure = True
#If set to True, all threads started by JobRunner are daemon threads.
#Daemon threads are terminated when the main thread finishes and hence allow immediate end of the parallel script when Ctrl-C is pressed
config.daemon_threads = True
#If set to True, the entire main working folder is deleted at the end of script
config.erase_workdir = False
#==== JobManager defaults ==================================================
#When two or more jobs have the same name, all the jobs apart from the first one are renamed to [jobname].002 ([jobname].003 etc.)
# The number here defines how many digits are used for that counter
config.jobmanager.counter_len = 3
#Defines the hashing method used for testing if some job was previously run
#Currently supported values are: 'input', 'runscript', 'input+runscript' and False/None
config.jobmanager.hashing = 'input'
#Removes all empty subdirectories in the main working folder at the end of the script
config.jobmanager.remove_empty_directories = True
#Defines what action to take when a particular jobfolder already exists in the filesystem
#Possible values are: None (throw exception), 'remove', 'rename' (to *.old)
# This option is relevant only when the current run uses an existing, non-empty main working folder with some other folders inside
config.jobmanager.jobfolder_exists = None
#==== Job defaults =========================================================
#After a job execution is finished, pickle the whole job object to [jobname].dill
config.job.pickle = True
#Define which files produced by the executed job should be kept on the disk
#See the documentation (Components overview -> Results -> Cleaning job folder) for details and possible values
config.job.keep = 'all'
config.job.save = 'all'
#The first line of all produced runscripts
config.job.runscript.shebang = '#!/bin/bash'
#If set to True, the standard output redirection is handled by your operating system (by using '>[jobname].out' in the runscript)
#If False, it is handled by native Python mechanism
#(set to True if you want to peek an output of a job submitted to a queue on a cluster, while the job is running)
config.job.runscript.stdout_redirect = False
#When files are imported into job's directory by rerun prevention, they can be either copied or hardlinked
#Set to True for hardlinks, False for copying
#On Windows this has no effect, files are always copied.
config.job.link_files = True
#==== Log defaults =========================================================
#Verbosity of log messages: 0:none 1:minimal 3:normal 5:verbose 7:extremely talkative
#Verbosity of the log printed to .log file in the main working folder
config.log.file = 5
#Verbosity of the log printed to the standard output
config.log.stdout = 3
#Print time for each log event
config.log.time = True
#Print date for each log event
config.log.date = False
#==== Subprocess retry =====================================================
#All the subprocess.run() calls in PLAMS are wrapped with the saferun() function which will retry the run() call if OSError was raised. This is done mainly to deal with BlockingIOError no 11 (resource temporarily unavailable) in case of large number of run() calls in a short period of time (for example, when a large MultiJob submits its long list of children to a job scheduler). The parameters below define the number of attempts to be made for each run() call and the delay between them.
config.saferun.repeat = 10
config.saferun.delay = 1
#==== Default JobRunner ====================================================
from .jobrunner import JobRunner
#Use a local serial JobRunner as a default
config.default_jobrunner = JobRunner(parallel=False)
#===========================================================================
#==== GridRunner definitions ===============================================
#===========================================================================
# GridRunner mechanism for testing if a job is finished:
#if [...].commands.finished exists it is used to check if the job is finished. It should be a function that takes a single string (job_id) as an argument and returns True or False
#otherwise [...].commands.check is combined with job_id, executed as a subprocess and returned exit code is tested (nonzero return code indicates that job has finished)
def __get_jobid(string):
tmp = string.split('.')[0]
digits = [ch for ch in tmp if ch.isdigit()]
return ''.join(digits) if digits else None
def __slurm_running(output):
lines = output.splitlines()[1:]
return [line.split()[0] for line in lines]
def __pbs_running(output):
lines = output.splitlines()[2:]
return [line.split()[0].split('.')[0] for line in lines]
from .jobrunner import GridRunner
GridRunner.config.pbs.workdir = '-d'
GridRunner.config.pbs.output = '-o'
GridRunner.config.pbs.error = '-e'
GridRunner.config.pbs.special.mem = '-l mem='
#use .nodes ='<nnodes>:ppn=<cpuspernode>'
GridRunner.config.pbs.special.nodes = '-l nodes='
GridRunner.config.pbs.special.walltime = '-l walltime='
GridRunner.config.pbs.special.queue = '-q '
GridRunner.config.pbs.commands.submit = 'qsub'
GridRunner.config.pbs.commands.check = 'qstat'
GridRunner.config.pbs.commands.getid = __get_jobid
GridRunner.config.pbs.commands.running = __pbs_running
GridRunner.config.slurm.workdir = '-D'
GridRunner.config.slurm.output = '-o'
GridRunner.config.slurm.error = '-e'
GridRunner.config.slurm.special.cpuspertask = '--cpus-per-task='
GridRunner.config.slurm.special.dependency = '--dependency='
GridRunner.config.slurm.special.exclusive = '--exclusive'
GridRunner.config.slurm.special.mempercpu = '--mem-per-cpu='
GridRunner.config.slurm.special.mem = '--mem='
GridRunner.config.slurm.special.nodes = '-N '
GridRunner.config.slurm.special.ntasks = '-n '
GridRunner.config.slurm.special.ntaskspercore = '--ntasks-per-core='
GridRunner.config.slurm.special.ntaskspernode = '--ntasks-per-node='
GridRunner.config.slurm.special.walltime = '-t '
GridRunner.config.slurm.special.queue = '-p '
GridRunner.config.slurm.special.account = '-A '
GridRunner.config.slurm.special.qos = '--qos='
GridRunner.config.slurm.special.constraint = '--constraint='
GridRunner.config.slurm.special.licenses = '--licenses='
GridRunner.config.slurm.commands.submit = 'sbatch'
GridRunner.config.slurm.commands.check = 'squeue'
GridRunner.config.slurm.commands.getid = __get_jobid
GridRunner.config.slurm.commands.running = __slurm_running
#Wrapper around myADF19_pbs submission script
GridRunner.config.terminatorADF.workdir = '-d'
GridRunner.config.terminatorADF.output = '-o'
GridRunner.config.terminatorADF.error = '-e'
GridRunner.config.terminatorADF.special.mem = '-m '
GridRunner.config.terminatorADF.special.nodes = '-x '
GridRunner.config.terminatorADF.special.cpuspernode = '-p '
GridRunner.config.terminatorADF.special.walltime = '-n '
GridRunner.config.terminatorADF.special.queue = '-q '
GridRunner.config.terminatorADF.special.version = '-v '
GridRunner.config.terminatorADF.commands.submit = 'myADF19_pbs'
GridRunner.config.terminatorADF.commands.check = 'qstat'
GridRunner.config.terminatorADF.commands.getid = __get_jobid
GridRunner.config.terminatorADF.commands.running = __pbs_running
#Use these as needed
#config.job.run.queue = 'standard'
#config.job.runscript.pre = 'module load adf'
#config.default_jobrunner = GridRunner(parallel=True, grid='pbs', maxjobs=16)
#config.default_jobrunner = GridRunner(parallel=True, grid='slurm', maxjobs=16)