-
Notifications
You must be signed in to change notification settings - Fork 1
/
sb3_train.submit_file
49 lines (38 loc) · 1.56 KB
/
sb3_train.submit_file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#---------------------------------------------
# Name your batch so it's easy to distinguish in the q.
JobBatchName = "PBN RL PPO"
# ---------------------------------------------------
# Universe (vanilla, docker)
universe = docker
docker_image = registry.eps.surrey.ac.uk/pbn-rl:ed941756
# -------------------------------------------------
# Event, out and error logs
log = logs/$(exp).$(steps).$(seed).log
output = outs/$(exp).$(steps).$(seed).log
error = logs/error/$(exp).$(steps).$(seed).log
# -----------------------------------
# File Transfer, Input, Output
# should_transfer_files = YES
environment = "mount=$ENV(PWD) WANDB_API_KEY=$ENV(WANDB_API_KEY) WANDB_RUN_GROUP=$(group)"
requirements = (CUDAGlobalMemoryMb > 3000) && \
(HasStornext) && \
(CUDACapability > 7.0)
# --------------------------------------
# Resources
request_CPUs = 1
request_memory = 3G
request_GPUs = 1
+GPUMem = 4000
# -----------------------------------
# Queue commands. We can use variables and flags to launch our command with multiple options (as you would from the command line)
arguments = $(script) --seed $(seed) --time-steps $(steps) --env $(env) --exp-name $(exp) --resume-training --checkpoint-dir $ENV(PWD)/models --log-dir $ENV(PWD)/logs
script = $ENV(PWD)/train_sb3.py
steps = 150000
env_n = 28
env = gym-PBN/Bittner-$(env_n)-v0
exp = ppo-default-$(env_n)
group = ppo-default-$(env_n)
+CanCheckpoint = true
+JobRunTime = 1
# Make the checkpoint location depend on the variables we use to run the model
queue 1 seed in 42 727 420 69 1337