-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcheckACDCCollections.py
95 lines (77 loc) · 3.34 KB
/
checkACDCCollections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# encoding: utf-8
"""
Retrieve data from the ACDC server and get the amount
of failures for a given workflow in a per-task basis.
You need to source the agent init.sh before running it:
source /data/srv/wmagent/current/sw/slc7_amd64_gcc630/cms/wmagent/1.1.20.patch5/etc/profile.d/init.sh
and example of command line is:
python checkACDCCollections.py -w pdmvserv_task_HIG-RunIIFall17wmLHEGS-03538__v1_T_190222_091942_5061 -v
"""
from __future__ import print_function
import sys
from optparse import OptionParser
from pprint import pprint
from WMCore.Database.CMSCouch import Database
def main():
"""
_main_
"""
usage = "Usage: python %prog -w workflow"
parser = OptionParser(usage=usage)
parser.add_option('-w', '--workflow', help='Workflow name in ReqMgr', dest='wf')
parser.add_option('-v', '--verbose', help='Enable verbose mode', action="store_true")
(options, args) = parser.parse_args()
if not options.wf:
parser.error('You must provide a workflow name')
sys.exit(1)
couchUrl = "https://cmsweb.cern.ch/couchdb"
database = "acdcserver"
failures = {}
svc = Database(database, couchUrl)
result = svc.loadView("ACDC", "byCollectionName", {'key': options.wf, 'include_docs': True, 'reduce': False})
print("Found %i failures/rows in total." % len(result["rows"]))
for entry in result["rows"]:
fsetName = entry['doc']['fileset_name']
failures.setdefault(fsetName, {'jobs': 0, 'files': 0, 'lumis': 0, 'uniqueLumis': 0, 'listLumis': set()})
failures[fsetName]['jobs'] += 1
failures[fsetName]['files'] += len(entry['doc']['files'])
for fname in entry['doc']['files']:
for runLumi in entry['doc']['files'][fname]['runs']:
failures[fsetName]['lumis'] += len(runLumi['lumis'])
failures[fsetName]['listLumis'] |= set(runLumi['lumis'])
for fsetName in failures:
failures[fsetName]['uniqueLumis'] = len(failures[fsetName]['listLumis'])
failures[fsetName]['listLumis'] = str(failures[fsetName]['listLumis'])
if not options.verbose:
for fset in failures.keys():
failures[fset].pop('listLumis', None)
print("Summary of failures is as follows:")
pprint(failures)
print("\nNow printing duplicate files + run + lumis per fileset")
printDups(result["rows"])
print("\nDone!")
def printDups(chunkFiles):
"""
Same logic as mergeFilesInfo from WMCore
"""
mergedFiles = {}
# Merge ACDC docs without any real input data (aka MCFakeFile)
for row in chunkFiles:
for fname, values in row['doc']['files'].iteritems():
if fname not in mergedFiles:
mergedFiles[fname] = {}
for runLumi in values['runs']:
runNum = str(runLumi['run_number'])
mergedFiles[fname][runNum] = set(runLumi['lumis'])
else:
for runLumi in values['runs']:
runNum = str(runLumi['run_number'])
lumiSet = set(runLumi['lumis'])
if lumiSet.issubset(mergedFiles[fname][runNum]):
print("Duplicate file: %s" % values)
continue
else:
mergedFiles[fname][runNum] |= lumiSet
if __name__ == "__main__":
sys.exit(main())