From a43a10d461cee1bd58ee36ebadb43b5a7909c5df Mon Sep 17 00:00:00 2001 From: Dario Mapelli Date: Mon, 28 Nov 2022 14:32:11 +0100 Subject: [PATCH] job wrapper - python3, new script and cleanEnv=False (#7441) * jobwrapper - use py3 from COMP in particular for the scripts: - CMSRunAnalysis.py - TwealPSet.py - cmscp.py This requires https://github.com/dmwm/WMCore/pull/11321 * jobwrapper - add env-CMSRunAnalysis.sh The new script env-CMSRunAnalysis.sh script is intended to be shared with WMCore team. We sould make the effort of keeping in this script, which can eventually be moved to WMCore's repo, all the shared code among CRAB and WMCore jobwrapper scripts (namely gWMS-CMSRunAnalysis.sh / CMSRunAnalysis.sh and submit_py3.sh). The script name and its functions names are just proposals and can be changed if we can come up with better names. * jobwrapper - Do not clean cmsRun's env When we launch cmsRun, we should imitate what WMCore is doing: do not clean the env, to preserve all the env variables set for us by the workern node / pilot and by cmsset_default.sh Just a (small ?) difference with respect to WMCore: we do not remove the COMP's python from the pythonpath. This can be dangerous and should be thoroughly tested. * jobwrapper - env-CMS.. renamed submit_env.sh After Stefano's review, I renamed the script env-CMSRunAnalysis.sh to submit_env.sh * requriements - wmcore 2.1.4 * submit_env - removed debugging prints --- bin/htcondor_make_runtime.sh | 1 + requirements.txt | 2 +- scripts/CMSRunAnalysis.py | 27 ++-- scripts/CMSRunAnalysis.sh | 82 +----------- scripts/TweakPSet.py | 2 + scripts/gWMS-CMSRunAnalysis.sh | 109 +++------------ scripts/submit_env.sh | 125 ++++++++++++++++++ setup.py | 3 +- .../TaskWorker/Actions/DagmanCreator.py | 5 +- .../Deployment/TaskWorker/updateTMRuntime.sh | 1 + 10 files changed, 172 insertions(+), 185 deletions(-) create mode 100755 scripts/submit_env.sh diff --git a/bin/htcondor_make_runtime.sh b/bin/htcondor_make_runtime.sh index 881f7640cf..c4dbf6dcd5 100755 --- a/bin/htcondor_make_runtime.sh +++ b/bin/htcondor_make_runtime.sh @@ -19,6 +19,7 @@ CRABSERVERREPO=dmwm [[ -d $STARTDIR ]] || mkdir -p $STARTDIR +cp $BASEDIR/../scripts/submit_env.sh $STARTDIR || exit 3 cp $BASEDIR/../scripts/gWMS-CMSRunAnalysis.sh $STARTDIR || exit 3 diff --git a/requirements.txt b/requirements.txt index 68769b678c..dab48d9917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ # Format: # Dependency==version -wmcver==2.1.2 +wmcver==2.1.4 diff --git a/scripts/CMSRunAnalysis.py b/scripts/CMSRunAnalysis.py index 46e60c303c..1a20b42f58 100644 --- a/scripts/CMSRunAnalysis.py +++ b/scripts/CMSRunAnalysis.py @@ -15,7 +15,7 @@ import signal import os.path import logging -import commands +import subprocess import traceback from ast import literal_eval from optparse import OptionParser, BadOptionError, AmbiguousOptionError @@ -384,7 +384,7 @@ def prepSandbox(opts): print("==== Sandbox untarring STARTING at %s ====" % time.asctime(time.gmtime())) #The user sandbox.tar.gz has to be unpacked no matter what (even in DEBUG mode) - print(commands.getoutput('tar xfm %s' % opts.archiveJob)) + print(subprocess.getoutput('tar xfm %s' % opts.archiveJob)) print("==== Sandbox untarring FINISHED at %s ====" % time.asctime(time.gmtime())) #move the pset in the right place @@ -408,7 +408,7 @@ def extractUserSandbox(archiveJob, cmsswVersion): # will be executed from the job working directory, so we move "up" # the PSet which is also in the user sandbox os.chdir(cmsswVersion) - print(commands.getoutput('tar xfm %s ' % os.path.join('..', archiveJob))) + print(subprocess.getoutput('tar xfm %s ' % os.path.join('..', archiveJob))) os.rename('PSet.py','../PSet.py') os.rename('PSet.pkl','../PSet.pkl') os.chdir('..') @@ -426,14 +426,14 @@ def getProv(filename, scram): output = scram.getStdout() return output -def executeUserApplication(command, scram): +def executeUserApplication(command, scram, cleanEnv=True): """ cmsRun failures will appear in FJR but do not raise exceptions exception can only be raised by unexpected failures of the Scram wrapper itself Scram() never raises and returns the exit code from executing 'command' """ with tempSetLogLevel(logger=logging.getLogger(), level=logging.DEBUG): - ret = scram(command, runtimeDir=os.getcwd()) + ret = scram(command, runtimeDir=os.getcwd(), cleanEnv=cleanEnv) if ret > 0: with open('cmsRun-stdout.log', 'w') as fh: fh.write(scram.diagnostic()) @@ -634,15 +634,6 @@ def StripReport(report): jobExitCode = None applicationName = 'CMSSW JOB' if not options.scriptExe else 'ScriptEXE' - # no matter what we run, it is very likely to need proxy location - preCmd = 'export X509_USER_PROXY=%s; ' % os.getenv('X509_USER_PROXY') - # needed for root problem with $HOME/.root.mimes, #6801 - preCmd += 'export HOME=${HOME:-$PWD}; ' - # temporary quick fix for #7413, CMSSW 12_6 requires new env variable - preCmd += 'export SITECONFIG_PATH=/cvmfs/cms.cern.ch/SITECONF/local; ' - # needed for accessing EOS at RAL (Echo). See https://ggus.eu/index.php?mode=ticket_info&ticket_id=155272 - if os.getenv('XrdSecGSISRVNAMES'): - preCmd += 'export XrdSecGSISRVNAMES=%s; ' % os.getenv('XrdSecGSISRVNAMES') print("==== %s Execution started at %s ====" % (applicationName, time.asctime(time.gmtime()))) if not options.scriptExe : cmd = 'cmsRun -p PSet.py -j FrameworkJobReport.xml' @@ -652,8 +643,8 @@ def StripReport(report): os.chmod(options.scriptExe, st.st_mode | stat.S_IEXEC) cmd = os.getcwd() + "/%s %s %s" %\ (options.scriptExe, options.jobNumber, " ".join(json.loads(options.scriptArgs))) - cmd = preCmd + cmd - applicationExitCode = executeUserApplication(cmd, scram) + + applicationExitCode = executeUserApplication(cmd, scram, cleanEnv=False) if applicationExitCode: print("==== Execution FAILED at %s ====" % time.asctime(time.gmtime())) print("==== %s Execution completed at %s ====" % (applicationName, time.asctime(time.gmtime()))) @@ -672,7 +663,7 @@ def StripReport(report): # e.g. from xroot https://github.com/dmwm/CRABServer/issues/6640#issuecomment-909362639 print("Sanitize FJR") cmd = 'cat -v FrameworkJobReport.xml > sane; mv sane FrameworkJobReport.xml' - print(commands.getoutput(cmd)) + print(subprocess.getoutput(cmd)) # parse FJR rep = Report("cmsRun") rep.parse('FrameworkJobReport.xml', "cmsRun") @@ -702,7 +693,7 @@ def StripReport(report): # e.g. from xroot https://github.com/dmwm/CRABServer/issues/6640#issuecomment-909362639 print("Sanitize FJR") cmd = 'cat -v FrameworkJobReport.xml > sane; mv sane FrameworkJobReport.xml' - print(commands.getoutput(cmd)) + print(subprocess.getoutput(cmd)) # parse FJR rep = Report("cmsRun") rep.parse('FrameworkJobReport.xml', "cmsRun") diff --git a/scripts/CMSRunAnalysis.sh b/scripts/CMSRunAnalysis.sh index 1498523eda..6fac2c76fb 100644 --- a/scripts/CMSRunAnalysis.sh +++ b/scripts/CMSRunAnalysis.sh @@ -24,82 +24,13 @@ echo "Local time : $(date)" echo "Current system : $(uname -a)" echo "Current processor: $(cat /proc/cpuinfo |grep name|sort|uniq)" -### source the CMSSW stuff using either OSG or LCG style entry env. or CVMFS -echo "======== CMS environment load starting at $(TZ=GMT date) ========" -if [ -f "$VO_CMS_SW_DIR"/cmsset_default.sh ] -then # LCG style -- - echo "WN with a LCG style environment, thus using VO_CMS_SW_DIR=$VO_CMS_SW_DIR" - . $VO_CMS_SW_DIR/cmsset_default.sh -elif [ -f "$OSG_APP"/cmssoft/cms/cmsset_default.sh ] -then # OSG style -- - echo "WN with an OSG style environment, thus using OSG_APP=$OSG_APP" - . $OSG_APP/cmssoft/cms/cmsset_default.sh CMSSW_3_3_2 -elif [ -f "$CVMFS"/cms.cern.ch/cmsset_default.sh ] -then - echo "WN with CVMFS environment, thus using CVMFS=$CVMFS" - . $CVMFS/cms.cern.ch/cmsset_default.sh -elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ] -then # ok, lets call it CVMFS then - export CVMFS=/cvmfs/cms.cern.ch - echo "WN missing VO_CMS_SW_DIR/OSG_APP/CVMFS environment variable, forcing it to CVMFS=$CVMFS" - . $CVMFS/cmsset_default.sh -else - echo "Error during job bootstrap: VO_CMS_SW_DIR, OSG_APP, CVMFS or /cvmfs were not found." >&2 - echo " Because of this, we can't load CMSSW. Not good." >&2 - exit 11003 -fi -echo -e "======== CMS environment load finished at $(TZ=GMT date) ========\n" - -echo "==== Python discovery STARTING ====" -# Python library required for Python2/Python3 compatibility through "future" -PY_FUTURE_VERSION=0.18.2 -# First, decide which COMP ScramArch to use based on the required OS -if [ "$REQUIRED_OS" = "rhel7" ]; -then - WMA_SCRAM_ARCH=slc7_amd64_gcc630 -else - WMA_SCRAM_ARCH=slc6_amd64_gcc493 -fi -echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH" - -suffix=etc/profile.d/init.sh -if [ -d "$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python -elif [ -d "$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python -elif [ -d "$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python -else - echo "Error during job bootstrap: job environment does not contain the init.sh script." >&2 - echo " Because of this, we can't load CMSSW. Not good." >&2 - exit 11004 -fi +source ./submit_env.sh -compPythonPath=`echo $prefix | sed 's|/python||'` -echo "WMAgent bootstrap: COMP Python path is: $compPythonPath" -latestPythonVersion=`ls -t "$prefix"/*/"$suffix" | head -n1 | sed 's|.*/external/python/||' | cut -d '/' -f1` -pythonMajorVersion=`echo $latestPythonVersion | cut -d '.' -f1` -pythonCommand="python"${pythonMajorVersion} -echo "WMAgent bootstrap: latest python release is: $latestPythonVersion" -source "$prefix/$latestPythonVersion/$suffix" -source "$compPythonPath/py2-future/$PY_FUTURE_VERSION/$suffix" +# from ./submit_env.sh +setup_cmsset -command -v $pythonCommand > /dev/null -rc=$? -if [[ $rc != 0 ]] -then - echo "Error during job bootstrap: python isn't available on the worker node." >&2 - echo " WMCore/WMAgent REQUIRES at least python2" >&2 - exit 11005 -else - echo "WMAgent bootstrap: found $pythonCommand at.." - echo `which $pythonCommand` -fi - -echo "==== Python discovery FINISHED at $(TZ=GMT date) ====" +# from ./submit_env.sh +setup_python_comp echo "==== Make sure $HOME is defined ====" export HOME=${HOME:-$PWD} @@ -132,10 +63,11 @@ for i in `ls`; do echo "== DIR: $i" done echo "==== Local directory contents dump FINISHING ====" + echo "======== CMSRunAnalysis.py STARTING at $(TZ=GMT date) ========" echo "Now running the CMSRunAnalysis.py job in `pwd`..." set -x -python CMSRunAnalysis.py -r "`pwd`" "$@" +$pythonCommand CMSRunAnalysis.py -r "`pwd`" "$@" jobrc=$? set +x echo "== The job had an exit code of $jobrc " diff --git a/scripts/TweakPSet.py b/scripts/TweakPSet.py index e3cd2c5f7d..c59a21c456 100644 --- a/scripts/TweakPSet.py +++ b/scripts/TweakPSet.py @@ -62,6 +62,7 @@ from ast import literal_eval from PSetTweaks.PSetTweak import PSetTweak +from Utils.Utilities import decodeBytesToUnicode def readFileFromTarball(filename, tarball): @@ -79,6 +80,7 @@ def readFileFromTarball(filename, tarball): try: f = tar_file.extractfile(filename) content = f.read() + content = decodeBytesToUnicode(content) break except KeyError as er: # Don`t exit due to KeyError, print error. EventBased and FileBased does not have run and lumis diff --git a/scripts/gWMS-CMSRunAnalysis.sh b/scripts/gWMS-CMSRunAnalysis.sh index 0c79769d9c..075889e867 100755 --- a/scripts/gWMS-CMSRunAnalysis.sh +++ b/scripts/gWMS-CMSRunAnalysis.sh @@ -6,15 +6,16 @@ # difficult-to-impossible to run. # -# On some sites we know there was some problems with environment cleaning -# with using 'env -i'. To overcome this issue, whenever we start a job, we have -# to save full current environment into file, and whenever it is needed we can load -# it. Be aware, that there are some read-only variables, like: BASHOPTS, BASH_VERSINFO, -# EUID, PPID, SHELLOPTS, UID, etc. -set > startup_environment.sh -sed -e 's/^/export /' startup_environment.sh > tmp_env.sh -mv tmp_env.sh startup_environment.sh -export JOBSTARTDIR=$PWD +echo "======== Startup environment - STARTING ========" + +# import some auxiliary functions from a script that is intented to be shared +# with WMCore +source ./submit_env.sh + +# from ./submit_env.sh +save_env + +echo "======== Startup environment - FINISHING ========" # Saving START_TIME and when job finishes, check if runtime is not lower than 20m # If it is lower, sleep the difference. Will not sleep if CRAB3_RUNTIME_DEBUG is set. @@ -87,11 +88,10 @@ echo "Hostname: $(hostname -f)" echo "System: $(uname -a)" echo "Arguments are $@" +# redirect stderr to stdout, so that it all goes to job_out.*, leaving job_err.* empty +# see https://stackoverflow.com/a/13088401 exec 2>&1 -touch jobReport.json -touch WMArchiveReport.json -echo "SCRAM_ARCH=$SCRAM_ARCH" CRAB_oneEventMode=0 if [ "X$_CONDOR_JOB_AD" != "X" ]; then @@ -124,6 +124,8 @@ then echo "======== HTCONDOR JOB SUMMARY at $(TZ=GMT date) FINISH ========" fi +touch jobReport.json +touch WMArchiveReport.json #MM: Are these two lines needed? touch jobReport.json.$CRAB_Id touch WMArchiveReport.json.$CRAB_Id @@ -137,7 +139,7 @@ time sh ./CMSRunAnalysis.sh "$@" --oneEventMode=$CRAB_oneEventMode EXIT_STATUS=$? echo "CMSRunAnalysis.sh complete at $(TZ=GMT date) with (short) exit status $EXIT_STATUS" -echo "======== CMSRunAnalsysis.sh at $(TZ=GMT date) FINISHING ========" +echo "======== CMSRunAnalysis.sh at $(TZ=GMT date) FINISHING ========" mv jobReport.json jobReport.json.$CRAB_Id mv WMArchiveReport.json WMArchiveReport.json.$CRAB_Id @@ -162,82 +164,11 @@ then fi echo "======== User application running completed. Prepare env. for stageout ===" -echo "======== WMAgent CMS environment load starting at $(TZ=GMT date) ========" -if [ -f "$VO_CMS_SW_DIR"/cmsset_default.sh ] -then # LCG style -- - echo "WN with a LCG style environment, thus using VO_CMS_SW_DIR=$VO_CMS_SW_DIR" - . $VO_CMS_SW_DIR/cmsset_default.sh -elif [ -f "$OSG_APP"/cmssoft/cms/cmsset_default.sh ] -then # OSG style -- - echo "WN with an OSG style environment, thus using OSG_APP=$OSG_APP" - . $OSG_APP/cmssoft/cms/cmsset_default.sh CMSSW_3_3_2 -elif [ -f "$CVMFS"/cms.cern.ch/cmsset_default.sh ] -then - echo "WN with CVMFS environment, thus using CVMFS=$CVMFS" - . $CVMFS/cms.cern.ch/cmsset_default.sh -elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ] -then # ok, lets call it CVMFS then - export CVMFS=/cvmfs/cms.cern.ch - echo "WN missing VO_CMS_SW_DIR/OSG_APP/CVMFS environment variable, forcing it to CVMFS=$CVMFS" - . $CVMFS/cmsset_default.sh -else - echo "Error during job bootstrap: VO_CMS_SW_DIR, OSG_APP, CVMFS or /cvmfs were not found." >&2 - echo " Because of this, we can't load CMSSW. Not good." >&2 - exit 11003 -fi -echo "WMAgent bootstrap: WMAgent thinks it found the correct CMSSW setup script" -echo -e "======== WMAgent CMS environment load finished at $(TZ=GMT date) ========\n" - -echo "======== python bootstrap for stageout at $(TZ=GMT date) STARTING ========" -# use python from COMP -# Python library required for Python2/Python3 compatibility through "future" -PY_FUTURE_VERSION=0.18.2 -# First, decide which COMP ScramArch to use based on the required OS -if [ "$REQUIRED_OS" = "rhel7" ]; -then - WMA_SCRAM_ARCH=slc7_amd64_gcc630 -else - WMA_SCRAM_ARCH=slc6_amd64_gcc493 -fi -echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH" - -suffix=etc/profile.d/init.sh -if [ -d "$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python -elif [ -d "$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python -elif [ -d "$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python ] -then - prefix="$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python -else - echo "Error during job bootstrap: job environment does not contain the init.sh script." >&2 - echo " Because of this, we can't load CMSSW. Not good." >&2 - exit 11004 -fi - -compPythonPath=`echo $prefix | sed 's|/python||'` -echo "WMAgent bootstrap: COMP Python path is: $compPythonPath" -latestPythonVersion=`ls -t "$prefix"/*/"$suffix" | head -n1 | sed 's|.*/external/python/||' | cut -d '/' -f1` -pythonMajorVersion=`echo $latestPythonVersion | cut -d '.' -f1` -pythonCommand="python"${pythonMajorVersion} -echo "WMAgent bootstrap: latest python release is: $latestPythonVersion" -source "$prefix/$latestPythonVersion/$suffix" -source "$compPythonPath/py2-future/$PY_FUTURE_VERSION/$suffix" +# from ./submit_env.sh +setup_cmsset -command -v $pythonCommand > /dev/null -rc=$? -if [[ $rc != 0 ]] -then - echo "Error during job bootstrap: python isn't available on the worker node." >&2 - echo " WMCore/WMAgent REQUIRES at least python2" >&2 - exit 11005 -else - echo "WMAgent bootstrap: found $pythonCommand at.." - echo `which $pythonCommand` -fi -echo "======== python bootstrap for stageout at $(TZ=GMT date) FINISHED ========" +# from ./submit_env.sh +setup_python_comp #echo "======== Attempting to notify HTCondor of file stageout ========" # wrong syntax for chirping, also needs a proper classAd name. Keep commented line for a future fix @@ -246,7 +177,7 @@ echo "======== python bootstrap for stageout at $(TZ=GMT date) FINISHED ======== echo "======== Stageout at $(TZ=GMT date) STARTING ========" rm -f wmcore_initialized # Note we prevent buffering of stdout/err -- this is due to observed issues in mixing of out/err for stageout plugins -PYTHONUNBUFFERED=1 python2.7 cmscp.py +PYTHONUNBUFFERED=1 $pythonCommand cmscp.py STAGEOUT_EXIT_STATUS=$? if [ ! -e wmcore_initialized ]; diff --git a/scripts/submit_env.sh b/scripts/submit_env.sh new file mode 100755 index 0000000000..12716163a5 --- /dev/null +++ b/scripts/submit_env.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +save_env() { + # save the current environment to the file startup_environment.sh + # this is intended to be the first function run by + # - gWMS-CMSRunAnalysis.sh: when running a job on the global pool + # - crab preparelocal, crab submit --dryrun: when running a job locally + + # On some sites we know there was some problems with environment cleaning + # with using 'env -i'. To overcome this issue, whenever we start a job, we have + # to save full current environment into file, and whenever it is needed we can load + # it. Be aware, that there are some read-only variables, like: BASHOPTS, BASH_VERSINFO, + # EUID, PPID, SHELLOPTS, UID, etc. + + # Moreover, src/python/WMCore/Storage/Backends/GFAL2Impl.py + # makes use of startup_environment.sh + + export JOBSTARTDIR=$PWD + export HOME=${HOME:-$PWD} + + declare -p > startup_environment.sh + +} + +setup_local_env () { + # when running a job locally, we need to set manually some variables that + # are set for us when running on the global pool. + + export SCRAM_ARCH=$(scramv1 arch) + export REQUIRED_OS=rhel7 + export CRAB_RUNTIME_TARBALL=local + export CRAB_TASKMANAGER_TARBALL=local + export CRAB3_RUNTIME_DEBUG=True + +} + +setup_cmsset() { + ### source the CMSSW stuff using either OSG or LCG style entry env. or CVMFS + echo "======== CMS environment load starting at $(TZ=GMT date) ========" + CMSSET_DEFAULT_PATH="" + if [ -f "$VO_CMS_SW_DIR"/cmsset_default.sh ] + then # LCG style -- + echo "WN with a LCG style environment, thus using VO_CMS_SW_DIR=$VO_CMS_SW_DIR" + CMSSET_DEFAULT_PATH=$VO_CMS_SW_DIR/cmsset_default.sh + elif [ -f "$OSG_APP"/cmssoft/cms/cmsset_default.sh ] + then # OSG style -- + echo "WN with an OSG style environment, thus using OSG_APP=$OSG_APP" + CMSSET_DEFAULT_PATH=$OSG_APP/cmssoft/cms/cmsset_default.sh CMSSW_3_3_2 + elif [ -f "$CVMFS"/cms.cern.ch/cmsset_default.sh ] + then + echo "WN with CVMFS environment, thus using CVMFS=$CVMFS" + CMSSET_DEFAULT_PATH=$CVMFS/cms.cern.ch/cmsset_default.sh + elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ] + then # ok, lets call it CVMFS then + CVMFS=/cvmfs/cms.cern.ch + echo "WN missing VO_CMS_SW_DIR/OSG_APP/CVMFS environment variable, forcing it to CVMFS=$CVMFS" + CMSSET_DEFAULT_PATH=$CVMFS/cmsset_default.sh + else + echo "Error during job bootstrap: VO_CMS_SW_DIR, OSG_APP, CVMFS or /cvmfs were not found." >&2 + echo " Because of this, we can't load CMSSW. Not good." >&2 + exit 11003 + fi + . $CMSSET_DEFAULT_PATH + echo -e "======== CMS environment load finished at $(TZ=GMT date) ========\n" +} + +setup_python_comp() { + echo "======== python bootstrap for stageout at $(TZ=GMT date) STARTING ========" + # Python library required for Python2/Python3 compatibility through "future" + PY3_FUTURE_VERSION=0.18.2 + # Saving START_TIME and when job finishes END_TIME. + START_TIME=$(date +%s) + WMA_DEFAULT_OS=rhel7 + export JOBSTARTDIR=$PWD + + # First, decide which COMP ScramArch to use based on the required OS and Architecture + THIS_ARCH=`uname -m` # if it's PowerPC, it returns `ppc64le` + # if this job can run at any OS, then use rhel7 as default + if [ "$REQUIRED_OS" = "any" ] + then + WMA_SCRAM_ARCH=${WMA_DEFAULT_OS}_${THIS_ARCH} + else + WMA_SCRAM_ARCH=${REQUIRED_OS}_${THIS_ARCH} + fi + echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH" + + # WMCore + suffix=etc/profile.d/init.sh + if [ -d "$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python3 ] + then + prefix="$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python3 + elif [ -d "$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python3 ] + then + prefix="$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python3 + elif [ -d "$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python3 ] + then + prefix="$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python3 + else + echo "Failed to find a COMP python3 installation in the worker node setup." >&2 + echo " Without a known python3, there is nothing else we can do with this job. Quiting!" >&2 + exit 11004 + fi + compPythonPath=`echo $prefix | sed 's|/python3||'` + echo "WMAgent bootstrap: COMP Python path is: $compPythonPath" + latestPythonVersion=`ls -t "$prefix"/*/"$suffix" | head -n1 | sed 's|.*/external/python3/||' | cut -d '/' -f1` + pythonMajorVersion=`echo $latestPythonVersion | cut -d '.' -f1` + pythonCommand="python"${pythonMajorVersion} + echo "WMAgent bootstrap: latest python3 release is: $latestPythonVersion" + source "$prefix/$latestPythonVersion/$suffix" + echo "Sourcing python future library from: ${compPythonPath}/py3-future/${PY3_FUTURE_VERSION}/${suffix}" + source "$compPythonPath/py3-future/${PY3_FUTURE_VERSION}/${suffix}" + + command -v $pythonCommand > /dev/null + rc=$? + if [[ $rc != 0 ]] + then + echo "Error during job bootstrap: python isn't available on the worker node." >&2 + echo " WMCore/WMAgent REQUIRES at least python2" >&2 + exit 11005 + else + echo "WMAgent bootstrap: found $pythonCommand at.." + echo `which $pythonCommand` + fi + echo "======== python bootstrap for stageout at $(TZ=GMT date) FINISHED ========" +} diff --git a/setup.py b/setup.py index dc5b27a2f0..b9a1c9dc69 100644 --- a/setup.py +++ b/setup.py @@ -275,7 +275,8 @@ def getWebDir(): package_dir={'': 'src/python'}, data_files=['scripts/%s' % x for x in \ ['CMSRunAnalysis.sh', 'cmscp.py', - 'gWMS-CMSRunAnalysis.sh', 'dag_bootstrap_startup.sh', + 'gWMS-CMSRunAnalysis.sh', 'submit_env.sh', + 'dag_bootstrap_startup.sh', 'dag_bootstrap.sh', 'AdjustSites.py']] + getWebDir(), ) diff --git a/src/python/TaskWorker/Actions/DagmanCreator.py b/src/python/TaskWorker/Actions/DagmanCreator.py index 2831f4133d..c933943f49 100644 --- a/src/python/TaskWorker/Actions/DagmanCreator.py +++ b/src/python/TaskWorker/Actions/DagmanCreator.py @@ -523,6 +523,7 @@ def makeJobSubmit(self, task): info['additional_input_file'] += ", sandbox.tar.gz" info['additional_input_file'] += ", run_and_lumis.tar.gz" info['additional_input_file'] += ", input_files.tar.gz" + info['additional_input_file'] += ", submit_env.sh" info['max_disk_space'] = MAX_DISK_SPACE @@ -1144,6 +1145,7 @@ def executeInternal(self, *args, **kw): transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') + env_location = getLocation('submit_env.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") @@ -1151,6 +1153,7 @@ def executeInternal(self, *args, **kw): shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') + shutil.copy(env_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') @@ -1188,7 +1191,7 @@ def executeInternal(self, *args, **kw): kw['task']['dbinstance'] = self.crabserver.getDbInstance() params = {} - inputFiles = ['gWMS-CMSRunAnalysis.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', + inputFiles = ['gWMS-CMSRunAnalysis.sh', 'submit_env.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', 'AdjustSites.py', 'site.ad', 'site.ad.json', 'datadiscovery.pkl', 'taskinformation.pkl', 'taskworkerconfig.pkl', 'run_and_lumis.tar.gz', 'input_files.tar.gz'] diff --git a/src/script/Deployment/TaskWorker/updateTMRuntime.sh b/src/script/Deployment/TaskWorker/updateTMRuntime.sh index 5a9f82caf9..1d94936bc2 100755 --- a/src/script/Deployment/TaskWorker/updateTMRuntime.sh +++ b/src/script/Deployment/TaskWorker/updateTMRuntime.sh @@ -44,6 +44,7 @@ filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/AdjustSites.p filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/dag_bootstrap_startup.sh" filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/dag_bootstrap.sh" filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/gWMS-CMSRunAnalysis.sh" +filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/submit_env.sh" filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/CMSRunAnalysis.sh" filesToCopy="$filesToCopy $CRAB_OVERRIDE_SOURCE/CRABServer/scripts/cmscp.py"