-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathddqn_train.submit_file
55 lines (42 loc) · 1.72 KB
/
ddqn_train.submit_file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#---------------------------------------------
# Name your batch so it's easy to distinguish in the q.
JobBatchName = "PBN RL DDQN"
# ---------------------------------------------------
# Universe (vanilla, docker)
universe = docker
docker_image = registry.eps.surrey.ac.uk/pbn-rl:ed941756
# -------------------------------------------------
# Event, out and error logs
log = logs/$(exp).$(steps).$(seed).log
output = outs/$(exp).$(steps).$(seed).log
error = logs/error/$(exp).$(steps).$(seed).log
# -----------------------------------
# File Transfer, Input, Output
# should_transfer_files = YES
environment = "mount=$ENV(PWD) WANDB_API_KEY=$ENV(WANDB_API_KEY) WANDB_RUN_GROUP=$(group)"
requirements = (CUDAGlobalMemoryMb > 3000) && \
(HasStornext) && \
(CUDACapability > 7.0)
# --------------------------------------
# Resources
request_CPUs = 1
request_memory = 5G
request_GPUs = 1
+GPUMem = 4000
#This job can checkpoint
+CanCheckpoint = true
# -----------------------------------
# Queue commands. We can use variables and flags to launch our command with multiple options (as you would from the command line)
arguments = $(script) --seed $(seed) --time-steps $(steps) --env $(env) --exp-name $(exp) --resume-training --checkpoint-dir $ENV(PWD)/models --log-dir $ENV(PWD)/logs --learning-starts $(learningstarts) --hyperparams $(hyperparams) --eval-only
script = $ENV(PWD)/train_ddqn.py
env_n = 28
batch_size = 256
env = gym-PBN/Bittner-$(env_n)-v0
exp = ddqn-$(env_n)-b$(batch_size)
group = ddqn-$(env_n)
hyperparams = exploration_fraction=0.25,batch_size=$(batch_size)
steps = 150000
learningstarts = 30000
# Hours the job will take at maximum
+JobRunTime = 3
queue 1 seed in 42 727 420 69 1337