-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatasetGenerator.py
204 lines (163 loc) · 8.77 KB
/
datasetGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import math
import random
from functools import reduce
import os
import argparse
def csv_field_wrap(field):
return '"' + field.replace('"', '""') + '"'
def zeroPadString(inputString, targetLength):
outputString = inputString
for i in range(0, targetLength - len(inputString)):
outputString = "0" + outputString
return outputString
def generate_record(sample_data, costCenter, aufnr):
record = []
columns = [x for x in sample_data]
for column in columns:
if column == "RCNTR":
record.append(csv_field_wrap(costCenter))
elif column == "AUFNR":
record.append(csv_field_wrap(aufnr))
elif column == "KOKRS":
record.append(csv_field_wrap("0001"))
elif column == "RACCT":
record.append(csv_field_wrap("0000400000"))
else:
sample_value = sample_data[column][random.randint(0, len(sample_data[column]) - 1)]
record.append(csv_field_wrap(sample_value))
return record
def generate(sample_data, args):
setnode = []
setleaf = []
columns = [x for x in sample_data]
cc_per_hierarchy = int(math.pow(2, args.hierarchyDepth)) - 1
if not os.path.exists(args.outputDirectory):
os.makedirs(args.outputDirectory)
output_file_name = args.outputDirectory + "generated_" + args.samplePath
with open(output_file_name, "w") as file:
file.write(",".join(list(map(lambda x: csv_field_wrap(x), columns))))
file.write("\n")
for i in range(0, args.selectivitySteps):
print("selectivity step: " + str(i+1) + " of " + str(args.selectivitySteps))
selectivity_prefix = zeroPadString(str(i+1), len(str(args.selectivitySteps)))
for j in range(0, cc_per_hierarchy):
cc_prefix = zeroPadString(str(j), len(str(cc_per_hierarchy)))
cost_center = selectivity_prefix + cc_prefix
cost_center_h = "H" + cost_center
if j == 0:
setnode.append(('', cost_center_h))
else:
setnode.append(("H" + selectivity_prefix + zeroPadString(str(int((j-1)/2)), len(str(cc_per_hierarchy))), cost_center_h))
setleaf.append((cost_center_h, cost_center))
for k in range(0, args.aufnrPerCc):
print(str(k))
aufnr = selectivity_prefix + cc_prefix + zeroPadString(str(k+1), len(str(args.aufnrPerCc)))
temp_records = []
for l in range(0, args.recordsPerAufnr):
temp_records.append(generate_record(sample_data, cost_center, aufnr))
file.write(
reduce(lambda x,y: x+y, map(lambda x: ",".join(x) + "\n", temp_records))) # magicOneLiner
# connect the hierarchies
if i < args.selectivitySteps - 1:
setnode.append((cost_center_h, "H" + zeroPadString(str(i+2), len(str(args.selectivitySteps))) + "00"))
write_setleaf_csv(args, setleaf)
write_setnode_csv(args, setnode)
write_sql_auth_setup_files(args, cc_per_hierarchy, setleaf)
def write_sql_auth_setup_files(args, cc_per_hierarchy, setleaf):
co_action_stmt = "INSERT INTO EVAL.UST12 (MANDT, OBJCT, AUTH, AKTPS, FIELD, VON, BIS) VALUES ('012','K_CCA','HIER','A','CO_ACTION','3027','');"
kstar_stmt = "INSERT INTO EVAL.UST12 (MANDT, OBJCT, AUTH, AKTPS, FIELD, VON, BIS) VALUES ('012','K_CCA','HIER','A','KSTAR','0000400000','');"
ust12_implicit_template = "INSERT INTO EVAL.UST12 (MANDT, OBJCT, AUTH, AKTPS, FIELD, VON, BIS) VALUES ('012','K_CCA','HIER','A','RESPAREA','HI0001{}','');"
ust12_explicit_stmts = get_ust12_stmts(setleaf)
usrbf2_stmts = get_usrbf2_stmts()
for j in range(1, args.selectivitySteps + 1):
dir_name = "selectivity-" + str(j / args.selectivitySteps)
if not os.path.exists(args.outputDirectory + dir_name):
os.makedirs(args.outputDirectory + dir_name)
with open(args.outputDirectory + dir_name + "/explicit-auth-setup.sql", "w") as file:
for stmt in usrbf2_stmts:
file.write(stmt)
file.write("\n")
file.write(co_action_stmt)
file.write("\n")
file.write(kstar_stmt)
file.write("\n")
for i in range(len(ust12_explicit_stmts) - j * cc_per_hierarchy, len(ust12_explicit_stmts)):
file.write(ust12_explicit_stmts[i])
file.write("\n")
with open(args.outputDirectory + dir_name + "/implicit-auth-setup.sql", "w") as file:
for stmt in usrbf2_stmts:
file.write(stmt)
file.write("\n")
file.write(co_action_stmt)
file.write("\n")
file.write(kstar_stmt)
file.write("\n")
file.write(
ust12_implicit_template.format("H" + zeroPadString(str(j), len(str(args.selectivitySteps))) + "00"))
# check for CSV import into HANA
def get_ust12_stmts(setleaf):
ust12_explicit_template = "INSERT INTO EVAL.UST12 (MANDT, OBJCT, AUTH, AKTPS, FIELD, VON, BIS) VALUES ('012','K_CCA','HIER','A','RESPAREA','KS0001{}','');"
ust12_explicit_stmts = []
for entry in setleaf:
ust12_explicit_stmts.append(ust12_explicit_template.format(entry[1]))
return ust12_explicit_stmts
def get_usrbf2_stmts():
usrbf2_template = "INSERT INTO EVAL.USRBF2 (MANDT, BNAME, OBJCT, AUTH) VALUES ('012','{}','K_CCA','HIER');"
usrbf2_stmts = []
usrbf2_stmts.append(usrbf2_template.format("USER"))
for i in range(0, 99):
usrbf2_stmts.append(usrbf2_template.format("USER_" + str(i)))
return usrbf2_stmts
def write_setnode_csv(args, setnode):
setnode_header = ["MANDT", "SETCLASS", "SUBCLASS", "SETNAME", "LINEID", "SUBSETCLS", "SUBSETSCLS", "SUBSETNAME",
"SEQNR"]
setnode_records = []
for entry in setnode:
setnode_records.append(["012", "0101", "0001", entry[0], "1", "0101", "0001", entry[1], "1"])
with open(args.outputDirectory + "setnode.csv", "w") as file:
file.write(",".join(list(map(lambda x: csv_field_wrap(x), setnode_header))))
file.write("\n")
for record in setnode_records:
file.write(",".join(list(map(lambda x: csv_field_wrap(x), record))))
file.write("\n")
def write_setleaf_csv(args, setleaf):
setleaf_header = ["MANDT", "SETCLASS", "SUBCLASS", "SETNAME", "LINEID", "VALSIGN", "VALOPTION", "VALFROM", "VALTO",
"SEQNR"]
setleaf_records = []
for entry in setleaf:
setleaf_records.append(["012", "0101", "0001", entry[0], "0000000001", "I", "EQ", entry[1], entry[1], "1"])
with open(args.outputDirectory + "setleaf.csv", "w") as file:
file.write(",".join(list(map(lambda x: csv_field_wrap(x), setleaf_header))))
file.write("\n")
for record in setleaf_records:
file.write(",".join(list(map(lambda x: csv_field_wrap(x), record))))
file.write("\n")
def prepare_sample_data(path):
columns = []
data = {}
fixed_columns = {"RCNTR", "AUFNR", "KOKRS", "RACCT"}
with open(path, "r") as file:
for line in file:
if len(columns) == 0:
columns = list(set(map(lambda x: x.replace("\"", ""), line[0:len(line)-1].split(","))).union(
fixed_columns))
for column in columns:
data[column] = []
else:
record = list(map(lambda x: x.replace("\"", ""), line[0:len(line)-1].split(",")))
for i in range(0, len(columns)):
data[columns[i]].append(record[i])
return data
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate a dataset and corresponding authorizations from a sample dataset.")
parser.add_argument('samplePath', type=str, help='Path to the sample CSV file')
parser.add_argument('recordsPerAufnr', type=int, help='Number of records per internal order.')
parser.add_argument('aufnrPerCc', type=int, help='Number of internal orders per cost center.')
parser.add_argument('hierarchyDepth', type=int, help='Depth of the generated cost center hierarchies.')
parser.add_argument('selectivitySteps', type=int, help='Number of selectivity steps (the data will be generated '
'such that there is an equal amount of records '
'per selectivity step).')
parser.add_argument('outputDirectory', type=str, nargs='?', default='output/', help='Directory where the output '
'files are stored to. Defaults to \"output/\".')
args = parser.parse_args()
generate(prepare_sample_data(args.samplePath), args)