From cd9704f508f84bcf7cef7a1b834c38afd89c4c60 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 13 May 2021 08:51:10 +0200 Subject: [PATCH 1/2] JobAccountant workaround for StepChain jobs with duplicate files - wmagent branch --- src/python/WMComponent/JobAccountant/AccountantWorker.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/python/WMComponent/JobAccountant/AccountantWorker.py b/src/python/WMComponent/JobAccountant/AccountantWorker.py index 59ade7ab3a..a77ac7f32d 100644 --- a/src/python/WMComponent/JobAccountant/AccountantWorker.py +++ b/src/python/WMComponent/JobAccountant/AccountantWorker.py @@ -454,6 +454,8 @@ def handleJob(self, jobID, fwkJobReport): conn=self.getDBConn(), transaction=self.existingTransaction()) + # FIXME: temporary workaround for: https://github.com/dmwm/WMCore/issues/9633 + skipOutputFiles = False if jobSuccess: fileList = fwkJobReport.getAllFiles() @@ -504,6 +506,7 @@ def handleJob(self, jobID, fwkJobReport): if not fwjrFile.get("locations") and fwjrFile.get("lfn", "").endswith(".root"): logging.warning("The following file doesn't have any location: %s", fwjrFile) jobSuccess = False + skipOutputFiles = True break else: fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') @@ -548,6 +551,12 @@ def handleJob(self, jobID, fwkJobReport): else: wmbsJob["outcome"] = "failure" + # FIXME: BAD HACK to avoid crashing the component + if skipOutputFiles: + logging.warning("Skipping output file registration for failed job: %d", jobID) + self.listOfJobsToFail.append(wmbsJob) + return jobSuccess + for fwjrFile in fileList: logging.debug("Job %d , register output %s", jobID, fwjrFile["lfn"]) From 578110de531ea943d09b91e2ebea66cc4a60aecf Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Tue, 5 Oct 2021 15:30:49 +0200 Subject: [PATCH 2/2] Support different architectures during job submission and runtime Fix comparison operator --- etc/submit.sh | 11 +++-- etc/submit_py3.sh | 11 +++-- .../WMCore/BossAir/Plugins/BasePlugin.py | 40 ++++++++++++++++++- .../BossAir/Plugins/SimpleCondorPlugin.py | 7 +++- src/python/WMCore/WMRuntime/Tools/Scram.py | 2 +- 5 files changed, 62 insertions(+), 9 deletions(-) diff --git a/etc/submit.sh b/etc/submit.sh index 500699dd6b..368465c7fd 100644 --- a/etc/submit.sh +++ b/etc/submit.sh @@ -121,12 +121,17 @@ echo -e "======== WMAgent CMS environment load finished at $(TZ=GMT date) ====== echo "======== WMAgent COMP Python bootstrap starting at $(TZ=GMT date) ========" -# First, decide which COMP ScramArch to use based on the required OS +# First, decide which COMP ScramArch to use based on the required OS and Architecture +THIS_ARCH=`uname -m` # if it's PowerPC, it returns `ppc64le` +if [ "$THIS_ARCH" = "x86_64" ] +then + THIS_ARCH="amd64" +fi if [ "$REQUIRED_OS" = "rhel7" ]; then - WMA_SCRAM_ARCH=slc7_amd64_gcc630 + WMA_SCRAM_ARCH=slc7_${THIS_ARCH}_gcc630 else - WMA_SCRAM_ARCH=slc6_amd64_gcc493 + WMA_SCRAM_ARCH=slc6_${THIS_ARCH}_gcc700 fi echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH" diff --git a/etc/submit_py3.sh b/etc/submit_py3.sh index aff1e6f988..8811199892 100644 --- a/etc/submit_py3.sh +++ b/etc/submit_py3.sh @@ -122,12 +122,17 @@ echo -e "======== WMAgent CMS environment load finished at $(TZ=GMT date) ====== echo "======== WMAgent COMP Python bootstrap starting at $(TZ=GMT date) ========" -# First, decide which COMP ScramArch to use based on the required OS +# First, decide which COMP ScramArch to use based on the required OS and Architecture +THIS_ARCH=`uname -m` # if it's PowerPC, it returns `ppc64le` +if [ "$THIS_ARCH" = "x86_64" ] +then + THIS_ARCH="amd64" +fi if [ "$REQUIRED_OS" = "rhel7" ]; then - WMA_SCRAM_ARCH=slc7_amd64_gcc630 + WMA_SCRAM_ARCH=slc7_${THIS_ARCH}_gcc630 else - WMA_SCRAM_ARCH=slc6_amd64_gcc700 + WMA_SCRAM_ARCH=slc6_${THIS_ARCH}_gcc700 fi echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH" diff --git a/src/python/WMCore/BossAir/Plugins/BasePlugin.py b/src/python/WMCore/BossAir/Plugins/BasePlugin.py index 5d7125fd75..57462c0845 100644 --- a/src/python/WMCore/BossAir/Plugins/BasePlugin.py +++ b/src/python/WMCore/BossAir/Plugins/BasePlugin.py @@ -8,8 +8,9 @@ from builtins import object, str, bytes from future.utils import viewitems, viewvalues +from Utils.Utilities import decodeBytesToUnicode from WMCore.WMException import WMException -from WMCore.WMRuntime.Tools.Scram import ARCH_TO_OS +from WMCore.WMRuntime.Tools.Scram import ARCH_TO_OS, SCRAM_TO_ARCH @@ -152,3 +153,40 @@ def scramArchtoRequiredOS(scramArch=None): requiredOSes.add('any') return ','.join(sorted(requiredOSes)) + + @staticmethod + def scramArchtoRequiredArch(scramArch=None): + """ + Converts a given ScramArch to a unique target CPU architecture. + Note that an architecture precedence is enforced in case there are + multiple matches. + In case no scramArch is defined, leave the architecture undefined. + :param scramArch: can be either a string or a list of ScramArchs + :return: a string with the matched architecture + """ + defaultArch = "X86_64" + requiredArchs = set() + if scramArch is None: + return None + elif isinstance(scramArch, (str, bytes)): + scramArch = [scramArch] + + for item in scramArch: + item = decodeBytesToUnicode(item) + arch = item.split("_")[1] + if arch not in SCRAM_TO_ARCH: + msg = "Job configured to a ScramArch: '{}' not supported in BossAir".format(item) + raise BossAirPluginException(msg) + requiredArchs.add(SCRAM_TO_ARCH.get(arch)) + + # now we have the final list of architectures, return only 1 of them + if len(requiredArchs) == 1: + return requiredArchs.pop() + elif "X86_64" in requiredArchs: + return "X86_64" + elif "ppc64le" in requiredArchs: + return "ppc64le" + elif "aarch64" in requiredArchs: + return "aarch64" + else: # should never get here! + return defaultArch diff --git a/src/python/WMCore/BossAir/Plugins/SimpleCondorPlugin.py b/src/python/WMCore/BossAir/Plugins/SimpleCondorPlugin.py index 5097d72981..c335dc2ca7 100644 --- a/src/python/WMCore/BossAir/Plugins/SimpleCondorPlugin.py +++ b/src/python/WMCore/BossAir/Plugins/SimpleCondorPlugin.py @@ -625,7 +625,12 @@ def getJobParameters(self, jobList): ad['My.REQUIRED_OS'] = classad.quote(encodeUnicodeToBytesConditional(requiredOSes, condition=PY2)) cmsswVersions = ','.join(job.get('swVersion')) ad['My.CMSSW_Versions'] = classad.quote(encodeUnicodeToBytesConditional(cmsswVersions, condition=PY2)) - + requiredArch = self.scramArchtoRequiredArch(job.get('scramArch')) + if not requiredArch: # only Cleanup jobs should not have ScramArch defined + ad['Requirements'] = '(TARGET.Arch =!= Undefined)' + else: + ad['Requirements'] = '(TARGET.Arch =?= "{}")'.format(requiredArch) + jobParameters.append(ad) return jobParameters diff --git a/src/python/WMCore/WMRuntime/Tools/Scram.py b/src/python/WMCore/WMRuntime/Tools/Scram.py index 65ed2ac5fd..12ac247776 100644 --- a/src/python/WMCore/WMRuntime/Tools/Scram.py +++ b/src/python/WMCore/WMRuntime/Tools/Scram.py @@ -38,8 +38,8 @@ from Utils.PythonVersion import PY3 from Utils.Utilities import encodeUnicodeToBytesConditional, decodeBytesToUnicodeConditional +SCRAM_TO_ARCH = {'amd64': 'X86_64', 'aarch64': 'aarch64', 'ppc64le': 'ppc64le'} ARCH_TO_OS = {'slc5': ['rhel6'], 'slc6': ['rhel6'], 'slc7': ['rhel7']} - OS_TO_ARCH = {} for arch, oses in viewitems(ARCH_TO_OS): for osName in oses: