-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgenerate_jobs.py
133 lines (117 loc) · 4.2 KB
/
generate_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import argparse
from causal_nf.job_creator import job_creator_dict
from causal_nf.job_creator.helper import *
import causal_nf.utils.io as causal_io
parser = argparse.ArgumentParser()
parser.add_argument(
"-grid_file", "--grid_file", type=str, default=None, help="Grid file"
)
parser.add_argument(
"-cluster_file",
"--cluster_file",
type=str,
default=os.path.join("grids", "cluster_cpu.yaml"),
help="Cluster file",
)
parser.add_argument(
"-format",
"--format",
type=str,
default="sub",
help="Format to create the executable files",
)
parser.add_argument(
"-wandb_mode",
"--wandb_mode",
type=str,
default="offline",
help="Format to create the executable files",
)
parser.add_argument(
"-project", "--project", type=str, default="Test", help="Wandb project"
)
parser.add_argument(
"-jobs_per_file",
"--jobs_per_file",
type=int,
default=1,
help="How many files to generate",
)
parser.add_argument(
"-batch_size",
"--batch_size",
type=int,
default=1,
help="How many files to generate",
)
parser.add_argument("-delete_ckpt", "--delete_ckpt", action="store_true")
parser.add_argument("-only_test", "--only_test", action="store_true")
args = parser.parse_args()
grid = causal_io.load_yaml(args.grid_file, flatten=False)
grid_flat = causal_io.load_yaml(args.grid_file, flatten=True)
keys = list(grid_flat.keys())
folder = os.path.dirname(args.grid_file)
grid_name = os.path.basename(args.grid_file)
grid_file_extra_list = get_grid_file_extra_list(args.grid_file)
if len(grid_file_extra_list) > 0:
options = []
for grid_extra_i in grid_file_extra_list:
causal_io.print_info(f"Getting configs from: {grid_extra_i}")
options_i = generate_options(grid_flat=grid_flat, grid_file_extra=grid_extra_i)
options.extend(options_i)
else:
options = generate_options(grid_flat=grid_flat, grid_file_extra=None)
grid_folder = os.path.splitext(args.grid_file)[0]
causal_io.makedirs_rm_exist(grid_folder)
group = grid_folder.split(os.sep)[-2]
sub_folder = os.path.join(grid_folder, "jobs")
output_folder = os.path.join(grid_folder, "output")
config_folder = os.path.join(grid_folder, "configs")
scripts_folder = os.path.join(grid_folder, "scripts")
causal_io.makedirs(scripts_folder, only_if_not_exists=True)
job_creator = job_creator_dict[args.format](
job_folder=sub_folder, output_folder=output_folder, header_file=args.cluster_file
)
num_jobs = len(options)
n_jobs_per_folder = args.jobs_per_file
main_str_list = []
folder_id_list = []
job_id_list = []
if args.only_test:
options = options[:1]
causal_io.print_info(f"Number of jobs: {len(options)}")
for i, option in enumerate(options):
folder_id = int(i // n_jobs_per_folder + 1)
grid_folder_i = os.path.join(config_folder, str(folder_id))
causal_io.makedirs(grid_folder_i, only_if_not_exists=True)
cfg_i = causal_io.create_yaml(grid, keys, option)
config_file = os.path.join(grid_folder_i, f"config_{i + 1}.yaml")
causal_io.save_yaml(cfg_i, config_file)
main_str = f"main.py --config_file {config_file}"
main_str += f" --wandb_mode {args.wandb_mode}"
main_str += f" --wandb_group {group}"
main_str += f" --project {args.project}"
if args.delete_ckpt:
main_str += f" --delete_ckpt"
job_id = int(i % n_jobs_per_folder)
main_str_list.append(main_str)
folder_id_list.append(folder_id)
job_id_list.append(job_id)
i = 0
batch_job_id = 0
batch_main_str = os.path.join(scripts_folder, f"batch_{batch_job_id}.py")
causal_io.str_to_file(f"import os", batch_main_str)
num_jobs = len(main_str_list)
for main_str, folder_id, job_id in zip(main_str_list, folder_id_list, job_id_list):
i += 1
if args.batch_size == 1:
job_creator.add_job(main_str, folder_id, job_id)
else:
causal_io.str_to_file(f"os.system('python {main_str}')", batch_main_str)
if i % args.batch_size == 0 or i == num_jobs:
job_creator.add_job(batch_main_str, folder_id, batch_job_id)
batch_job_id += 1
script_str = ""
batch_main_str = os.path.join(scripts_folder, f"batch_{batch_job_id}.py")
causal_io.str_to_file(f"import os", batch_main_str)
print(f"Total number of jobs: {i}")