Skip to content

Commit

Permalink
Update reco and express config entry points to enable processing vers…
Browse files Browse the repository at this point in the history
…ion (#49)

* Update reco and express config entry points to enable processing version

* Update DataExpressConfigHistory.py

* Update Regexps.py

Add wildcard use to stream queries

* Update DataExpressConfigHistory.py

---------

Co-authored-by: Antonio <[email protected]>
  • Loading branch information
LinaresToine and Antonio authored Oct 13, 2023
1 parent bdb7edd commit 977a85f
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 37 deletions.
4 changes: 3 additions & 1 deletion src/python/DataExpressConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def get(self,run, stream):
express_config.dqm_seq,
express_config.global_tag,
express_config.scenario,
express_config.proc_version,
express_config.multicore,
express_config.write_tiers,
express_config.write_dqm
Expand All @@ -52,7 +53,7 @@ def get(self,run, stream):
for result in c.fetchall():

(run, stream, cmssw, scram_arch, reco_cmssw, reco_scram_arch, alca_skim,
dqm_seq, global_tag, scenario, multicore, write_tiers, write_dqm) = result
dqm_seq, global_tag, scenario, proc_version, multicore, write_tiers, write_dqm) = result

config = { "run" : run,
"stream" : stream,
Expand All @@ -64,6 +65,7 @@ def get(self,run, stream):
"dqm_seq" : dqm_seq,
"global_tag" : global_tag,
"scenario" : scenario,
"proc_version" : proc_version,
"multicore" : multicore,
"write_tiers" : write_tiers,
"write_dqm" : bool(write_dqm) }
Expand Down
76 changes: 57 additions & 19 deletions src/python/DataExpressConfigHistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,47 +21,84 @@ def get(self, stream, scenario):
:returns: stream, Scenario Acquisition era, minimum run, maximum run, CMSSW, PhysicsSkim, DqmSeq, GlobalTag"""

sql = """
SELECT express_config.stream p_stream, express_config.scenario p_scenario, MAX(run_config.run) max_run, MIN(run_config.run) min_run, express_config.cmssw cmssw, express_config.global_tag global_tag, express_config.alca_skim alca_skim, express_config.dqm_seq dqm_seq, run_config.acq_era acq_era
SELECT express_config.stream p_stream,
express_config.scenario p_scenario,
MAX(run_config.run) max_run,
MIN(run_config.run) min_run,
express_config.cmssw cmssw,
express_config.global_tag global_tag,
express_config.alca_skim alca_skim,
express_config.dqm_seq dqm_seq,
express_config.proc_version proc_version,
run_config.acq_era acq_era
FROM express_config
JOIN run_config ON run_config.run = express_config.run
"""
sql_with_primds = """
WHERE stream = :p_stream
GROUP BY run_config.acq_era, express_config.stream, express_config.scenario, express_config.cmssw, express_config.global_tag, express_config.alca_skim, express_config.dqm_seq
ORDER BY express_config.stream, MAX(run_config.run) desc, MIN(run_config.run) desc
sql_with_stream = """
WHERE stream LIKE :p_stream
GROUP BY run_config.acq_era,
express_config.stream,
express_config.scenario,
express_config.cmssw,
express_config.global_tag,
express_config.alca_skim,
express_config.dqm_seq,
express_config.proc_version
ORDER BY MAX(run_config.run) desc, MIN(run_config.run) desc, express_config.stream
"""
sql_with_scenario = """
WHERE express_config.scenario = :p_scenario
GROUP BY run_config.acq_era, express_config.stream, express_config.scenario, express_config.cmssw, express_config.global_tag, express_config.alca_skim, express_config.dqm_seq
ORDER BY express_config.stream, MAX(run_config.run) desc, MIN(run_config.run) desc
WHERE express_config.scenario LIKE :p_scenario
GROUP BY run_config.acq_era,
express_config.stream,
express_config.scenario,
express_config.cmssw,
express_config.global_tag,
express_config.alca_skim,
express_config.dqm_seq,
express_config.proc_version
ORDER BY MAX(run_config.run) desc, MIN(run_config.run) desc
"""
sql_with_both = """
WHERE express_config.stream = :p_stream AND express_config.scenario = :p_scenario
GROUP BY run_config.acq_era, express_config.stream, express_config.scenario, express_config.cmssw, express_config.global_tag, express_config.alca_skim, express_config.dqm_seq
ORDER BY express_config.stream, MAX(run_config.run) desc, MIN(run_config.run) desc
WHERE express_config.stream LIKE :p_stream AND express_config.scenario LIKE :p_scenario
GROUP BY run_config.acq_era,
express_config.stream,
express_config.scenario,
express_config.cmssw,
express_config.global_tag,
express_config.alca_skim,
express_config.dqm_seq,
express_config.proc_version
ORDER BY MAX(run_config.run) desc, MIN(run_config.run) desc, express_config.stream
"""
sql_default = """
GROUP BY run_config.acq_era, express_config.stream, express_config.scenario, express_config.cmssw, express_config.global_tag, express_config.alca_skim, express_config.dqm_seq
ORDER BY express_config.stream, MAX(run_config.run) desc, MIN(run_config.run) desc
GROUP BY run_config.acq_era,
express_config.stream,
express_config.scenario,
express_config.cmssw,
express_config.global_tag,
express_config.alca_skim,
express_config.dqm_seq,
express_config.proc_version
ORDER BY MAX(run_config.run) desc, MIN(run_config.run) desc
"""

if stream is not None and scenario is None:
sql_ = sql + sql_with_primds
c, _ = self.api.execute(sql_, p_stream = stream)
sql_ = sql + sql_with_stream
c, _ = self.api.execute(sql_, p_stream = '%' + str(stream) + '%')
elif stream is not None and scenario is not None:
sql_ = sql + sql_with_both
c, _ = self.api.execute(sql_, p_stream = stream, p_scenario = scenario)
c, _ = self.api.execute(sql_, p_stream = '%' + str(stream) + '%', p_scenario = '%' + str(scenario) + '%')
elif stream is None and scenario is not None:
sql_ = sql + sql_with_scenario
c, _ = self.api.execute(sql_, p_scenario = scenario)
c, _ = self.api.execute(sql_, p_scenario = '%' + str(scenario) + '%')
else:
sql_ = sql + sql_default
c, _ = self.api.execute(sql_)

configs = []
for result in c.fetchall():

(p_stream, p_scenario, max_run, min_run, cmssw, global_tag, alca_skim, dqm_seq, acq_era) = result
(p_stream, p_scenario, max_run, min_run, cmssw, global_tag, alca_skim, dqm_seq, proc_version, acq_era) = result

config = { "stream" : p_stream,
"scenario" : p_scenario,
Expand All @@ -71,9 +108,10 @@ def get(self, stream, scenario):
"global_tag" : global_tag,
"alca_skim" : alca_skim,
"dqm_seq" : dqm_seq,
"proc_version" : proc_version,
"acq_era" : acq_era }
configs.append(config)

return configs



6 changes: 4 additions & 2 deletions src/python/DataRecoConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get(self,run, primary_dataset):
:arg int run: the run number (latest if not specified)
:arg str primary_dataset: the primary dataset name (optional, otherwise queries for all)
:returns: Run, PrimaryDataset, CMSSW, ScramArch, AlcaSkim, PhysicsSkim, DqmSeq, GlobalTag, Scenario"""
:returns: Run, PrimaryDataset, CMSSW, ScramArch, AlcaSkim, PhysicsSkim, DqmSeq, GlobalTag, Scenario and Processing Version"""

sqlWhereWithRun="reco_config.run = :run"
sqlWhereWithoutRun="reco_config.run = (select max(run) from reco_config)"
Expand All @@ -34,6 +34,7 @@ def get(self,run, primary_dataset):
reco_config.dqm_seq,
reco_config.global_tag,
reco_config.scenario,
reco_config.proc_version,
reco_config.multicore,
reco_config.write_reco,
reco_config.write_dqm,
Expand All @@ -54,7 +55,7 @@ def get(self,run, primary_dataset):
for result in c.fetchall():

(run, primds, cmssw, scram_arch, alca_skim, physics_skim, dqm_seq,
global_tag, scenario, multicore, write_reco, write_dqm, write_aod, write_miniaod, write_nanoaod) = result
global_tag, scenario, proc_version, multicore, write_reco, write_dqm, write_aod, write_miniaod, write_nanoaod) = result

config = { "run" : run,
"primary_dataset" : primds,
Expand All @@ -65,6 +66,7 @@ def get(self,run, primary_dataset):
"dqm_seq" : dqm_seq,
"global_tag" : global_tag,
"scenario" : scenario,
"proc_version" : proc_version,
"multicore" : multicore,
"write_reco": bool(write_reco),
"write_dqm" : bool(write_dqm),
Expand Down
62 changes: 50 additions & 12 deletions src/python/DataRecoConfigHistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,47 +21,84 @@ def get(self, primary_dataset, scenario):
:returns: PrimaryDataset, Scenario, Acquisition era, minimum run, maximum run, CMSSW, PhysicsSkim, DqmSeq, GlobalTag"""

sql = """
SELECT reco_config.primds primds, reco_config.scenario p_scenario, MAX(run_config.run) max_run, MIN(run_config.run) min_run, reco_config.cmssw cmssw, reco_config.global_tag global_tag, reco_config.physics_skim physics_skim, reco_config.dqm_seq dqm_seq, run_config.acq_era acq_era
SELECT reco_config.primds primds,
reco_config.scenario p_scenario,
MAX(run_config.run) max_run,
MIN(run_config.run) min_run,
reco_config.cmssw cmssw,
reco_config.global_tag global_tag,
reco_config.physics_skim physics_skim,
reco_config.dqm_seq dqm_seq,
reco_config.proc_version proc_version,
run_config.acq_era acq_era
FROM reco_config
JOIN run_config ON run_config.run = reco_config.run
"""
sql_with_primds = """
WHERE primds = :primds
GROUP BY run_config.acq_era, reco_config.primds, reco_config.scenario, reco_config.cmssw, reco_config.global_tag, reco_config.physics_skim, reco_config.dqm_seq
WHERE primds LIKE :primds
GROUP BY run_config.acq_era,
reco_config.primds,
reco_config.scenario,
reco_config.cmssw,
reco_config.global_tag,
reco_config.physics_skim,
reco_config.dqm_seq,
reco_config.proc_version
ORDER BY reco_config.primds, MAX(run_config.run) desc, MIN(run_config.run) desc
"""
sql_with_scenario = """
WHERE reco_config.scenario = :p_scenario
GROUP BY run_config.acq_era, reco_config.primds, reco_config.scenario, reco_config.cmssw, reco_config.global_tag, reco_config.physics_skim, reco_config.dqm_seq
WHERE reco_config.scenario LIKE :p_scenario
GROUP BY run_config.acq_era,
reco_config.primds,
reco_config.scenario,
reco_config.cmssw,
reco_config.global_tag,
reco_config.physics_skim,
reco_config.dqm_seq,
reco_config.proc_version
ORDER BY reco_config.primds, MAX(run_config.run) desc, MIN(run_config.run) desc
"""
sql_with_both = """
WHERE reco_config.primds = :primds AND reco_config.scenario = :p_scenario
GROUP BY run_config.acq_era, reco_config.primds, reco_config.scenario, reco_config.cmssw, reco_config.global_tag, reco_config.physics_skim, reco_config.dqm_seq
WHERE reco_config.primds LIKE :primds AND reco_config.scenario LIKE :p_scenario
GROUP BY run_config.acq_era,
reco_config.primds,
reco_config.scenario,
reco_config.cmssw,
reco_config.global_tag,
reco_config.physics_skim,
reco_config.dqm_seq,
reco_config.proc_version
ORDER BY reco_config.primds, MAX(run_config.run) desc, MIN(run_config.run) desc
"""
sql_default = """
GROUP BY run_config.acq_era, reco_config.primds, reco_config.scenario, reco_config.cmssw, reco_config.global_tag, reco_config.physics_skim, reco_config.dqm_seq
GROUP BY run_config.acq_era,
reco_config.primds,
reco_config.scenario,
reco_config.cmssw,
reco_config.global_tag,
reco_config.physics_skim,
reco_config.dqm_seq,
reco_config.proc_version
ORDER BY reco_config.primds, MAX(run_config.run) desc, MIN(run_config.run) desc
"""

if primary_dataset is not None and scenario is None:
sql_ = sql + sql_with_primds
c, _ = self.api.execute(sql_, primds = primary_dataset)
c, _ = self.api.execute(sql_, primds = '%' + str(primary_dataset) + '%')
elif primary_dataset is not None and scenario is not None:
sql_ = sql + sql_with_both
c, _ = self.api.execute(sql_, primds = primary_dataset, p_scenario = scenario)
c, _ = self.api.execute(sql_, primds = '%' + str(primary_dataset) + '%', p_scenario = '%' + str(scenario) + '%')
elif primary_dataset is None and scenario is not None:
sql_ = sql + sql_with_scenario
c, _ = self.api.execute(sql_, p_scenario = scenario)
c, _ = self.api.execute(sql_, p_scenario = '%' + str(scenario) + '%')
else:
sql_ = sql + sql_default
c, _ = self.api.execute(sql_)

configs = []
for result in c.fetchall():

(primds, p_scenario, max_run, min_run, cmssw, global_tag, physics_skim, dqm_seq, acq_era) = result
(primds, p_scenario, max_run, min_run, cmssw, global_tag, physics_skim, dqm_seq, proc_version, acq_era) = result

config = { "primary_dataset" : primds,
"scenario" : p_scenario,
Expand All @@ -71,6 +108,7 @@ def get(self, primary_dataset, scenario):
"global_tag" : global_tag,
"physics_skim" : physics_skim,
"dqm_seq" : dqm_seq,
"proc_version" : proc_version,
"acq_era" : acq_era }
configs.append(config)

Expand Down
6 changes: 3 additions & 3 deletions src/python/Regexps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
#: Regular expression for Tier0 Run ID.

RX_RUN = re.compile(r"^[1-9][0-9]{1,6}$")
RX_STREAM = re.compile(r"[A-Z][0-9a-zA-Z]+")
RX_PRIMARY_DATASET = re.compile(r"[A-Z][0-9a-zA-Z]+")
RX_SCENARIO = re.compile(r"[a-zA-Z][0-9a-zA-Z]+")
RX_STREAM = re.compile(r"[0-9_a-zA-Z]+")
RX_PRIMARY_DATASET = re.compile(r"[0-9_a-zA-Z]+")
RX_SCENARIO = re.compile(r"[0-9_a-zA-Z]+")
RX_ERA = re.compile(r"[0-9_a-zA-Z]+")
RX_EXPRESS_GLOBAL_TAG = re.compile(r"[0-9_a-zA-Z]+")
RX_PROMPT_GLOBAL_TAG = re.compile(r"[0-9_a-zA-Z]+")
Expand Down

0 comments on commit 977a85f

Please sign in to comment.