Skip to content

Commit

Permalink
job wrapper - python3, new script and cleanEnv=False (#7441)
Browse files Browse the repository at this point in the history
* jobwrapper - use py3 from COMP

in particular for the scripts:

- CMSRunAnalysis.py
- TwealPSet.py
- cmscp.py

This requires dmwm/WMCore#11321

* jobwrapper - add env-CMSRunAnalysis.sh

The new script env-CMSRunAnalysis.sh script is intended
to be shared with WMCore team.

We sould make the effort of keeping in this script,
which can eventually be moved to WMCore's repo,
all the shared code among CRAB and WMCore jobwrapper
scripts (namely gWMS-CMSRunAnalysis.sh / CMSRunAnalysis.sh
and submit_py3.sh).

The script name and its functions names are just proposals
and can be changed if we can come up with better names.

* jobwrapper - Do not clean cmsRun's env

When we launch cmsRun, we should imitate what WMCore is
doing: do not clean the env, to preserve all the env
variables set for us by the workern node / pilot
and by cmsset_default.sh

Just a (small ?) difference with respect to WMCore:
we do not remove the COMP's python from the pythonpath.
This can be dangerous and should be thoroughly tested.

* jobwrapper - env-CMS.. renamed submit_env.sh

After Stefano's review, I renamed the script
env-CMSRunAnalysis.sh to submit_env.sh

* requriements - wmcore 2.1.4

* submit_env - removed debugging prints
  • Loading branch information
mapellidario authored Nov 28, 2022
1 parent 8a048d1 commit a43a10d
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 185 deletions.
1 change: 1 addition & 0 deletions bin/htcondor_make_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ CRABSERVERREPO=dmwm

[[ -d $STARTDIR ]] || mkdir -p $STARTDIR

cp $BASEDIR/../scripts/submit_env.sh $STARTDIR || exit 3
cp $BASEDIR/../scripts/gWMS-CMSRunAnalysis.sh $STARTDIR || exit 3


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
# Format:
# Dependency==version

wmcver==2.1.2
wmcver==2.1.4

27 changes: 9 additions & 18 deletions scripts/CMSRunAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import signal
import os.path
import logging
import commands
import subprocess
import traceback
from ast import literal_eval
from optparse import OptionParser, BadOptionError, AmbiguousOptionError
Expand Down Expand Up @@ -384,7 +384,7 @@ def prepSandbox(opts):
print("==== Sandbox untarring STARTING at %s ====" % time.asctime(time.gmtime()))

#The user sandbox.tar.gz has to be unpacked no matter what (even in DEBUG mode)
print(commands.getoutput('tar xfm %s' % opts.archiveJob))
print(subprocess.getoutput('tar xfm %s' % opts.archiveJob))
print("==== Sandbox untarring FINISHED at %s ====" % time.asctime(time.gmtime()))

#move the pset in the right place
Expand All @@ -408,7 +408,7 @@ def extractUserSandbox(archiveJob, cmsswVersion):
# will be executed from the job working directory, so we move "up"
# the PSet which is also in the user sandbox
os.chdir(cmsswVersion)
print(commands.getoutput('tar xfm %s ' % os.path.join('..', archiveJob)))
print(subprocess.getoutput('tar xfm %s ' % os.path.join('..', archiveJob)))
os.rename('PSet.py','../PSet.py')
os.rename('PSet.pkl','../PSet.pkl')
os.chdir('..')
Expand All @@ -426,14 +426,14 @@ def getProv(filename, scram):
output = scram.getStdout()
return output

def executeUserApplication(command, scram):
def executeUserApplication(command, scram, cleanEnv=True):
"""
cmsRun failures will appear in FJR but do not raise exceptions
exception can only be raised by unexpected failures of the Scram wrapper itself
Scram() never raises and returns the exit code from executing 'command'
"""
with tempSetLogLevel(logger=logging.getLogger(), level=logging.DEBUG):
ret = scram(command, runtimeDir=os.getcwd())
ret = scram(command, runtimeDir=os.getcwd(), cleanEnv=cleanEnv)
if ret > 0:
with open('cmsRun-stdout.log', 'w') as fh:
fh.write(scram.diagnostic())
Expand Down Expand Up @@ -634,15 +634,6 @@ def StripReport(report):

jobExitCode = None
applicationName = 'CMSSW JOB' if not options.scriptExe else 'ScriptEXE'
# no matter what we run, it is very likely to need proxy location
preCmd = 'export X509_USER_PROXY=%s; ' % os.getenv('X509_USER_PROXY')
# needed for root problem with $HOME/.root.mimes, #6801
preCmd += 'export HOME=${HOME:-$PWD}; '
# temporary quick fix for #7413, CMSSW 12_6 requires new env variable
preCmd += 'export SITECONFIG_PATH=/cvmfs/cms.cern.ch/SITECONF/local; '
# needed for accessing EOS at RAL (Echo). See https://ggus.eu/index.php?mode=ticket_info&ticket_id=155272
if os.getenv('XrdSecGSISRVNAMES'):
preCmd += 'export XrdSecGSISRVNAMES=%s; ' % os.getenv('XrdSecGSISRVNAMES')
print("==== %s Execution started at %s ====" % (applicationName, time.asctime(time.gmtime())))
if not options.scriptExe :
cmd = 'cmsRun -p PSet.py -j FrameworkJobReport.xml'
Expand All @@ -652,8 +643,8 @@ def StripReport(report):
os.chmod(options.scriptExe, st.st_mode | stat.S_IEXEC)
cmd = os.getcwd() + "/%s %s %s" %\
(options.scriptExe, options.jobNumber, " ".join(json.loads(options.scriptArgs)))
cmd = preCmd + cmd
applicationExitCode = executeUserApplication(cmd, scram)

applicationExitCode = executeUserApplication(cmd, scram, cleanEnv=False)
if applicationExitCode:
print("==== Execution FAILED at %s ====" % time.asctime(time.gmtime()))
print("==== %s Execution completed at %s ====" % (applicationName, time.asctime(time.gmtime())))
Expand All @@ -672,7 +663,7 @@ def StripReport(report):
# e.g. from xroot https://github.com/dmwm/CRABServer/issues/6640#issuecomment-909362639
print("Sanitize FJR")
cmd = 'cat -v FrameworkJobReport.xml > sane; mv sane FrameworkJobReport.xml'
print(commands.getoutput(cmd))
print(subprocess.getoutput(cmd))
# parse FJR
rep = Report("cmsRun")
rep.parse('FrameworkJobReport.xml', "cmsRun")
Expand Down Expand Up @@ -702,7 +693,7 @@ def StripReport(report):
# e.g. from xroot https://github.com/dmwm/CRABServer/issues/6640#issuecomment-909362639
print("Sanitize FJR")
cmd = 'cat -v FrameworkJobReport.xml > sane; mv sane FrameworkJobReport.xml'
print(commands.getoutput(cmd))
print(subprocess.getoutput(cmd))
# parse FJR
rep = Report("cmsRun")
rep.parse('FrameworkJobReport.xml', "cmsRun")
Expand Down
82 changes: 7 additions & 75 deletions scripts/CMSRunAnalysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,82 +24,13 @@ echo "Local time : $(date)"
echo "Current system : $(uname -a)"
echo "Current processor: $(cat /proc/cpuinfo |grep name|sort|uniq)"

### source the CMSSW stuff using either OSG or LCG style entry env. or CVMFS
echo "======== CMS environment load starting at $(TZ=GMT date) ========"
if [ -f "$VO_CMS_SW_DIR"/cmsset_default.sh ]
then # LCG style --
echo "WN with a LCG style environment, thus using VO_CMS_SW_DIR=$VO_CMS_SW_DIR"
. $VO_CMS_SW_DIR/cmsset_default.sh
elif [ -f "$OSG_APP"/cmssoft/cms/cmsset_default.sh ]
then # OSG style --
echo "WN with an OSG style environment, thus using OSG_APP=$OSG_APP"
. $OSG_APP/cmssoft/cms/cmsset_default.sh CMSSW_3_3_2
elif [ -f "$CVMFS"/cms.cern.ch/cmsset_default.sh ]
then
echo "WN with CVMFS environment, thus using CVMFS=$CVMFS"
. $CVMFS/cms.cern.ch/cmsset_default.sh
elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ]
then # ok, lets call it CVMFS then
export CVMFS=/cvmfs/cms.cern.ch
echo "WN missing VO_CMS_SW_DIR/OSG_APP/CVMFS environment variable, forcing it to CVMFS=$CVMFS"
. $CVMFS/cmsset_default.sh
else
echo "Error during job bootstrap: VO_CMS_SW_DIR, OSG_APP, CVMFS or /cvmfs were not found." >&2
echo " Because of this, we can't load CMSSW. Not good." >&2
exit 11003
fi
echo -e "======== CMS environment load finished at $(TZ=GMT date) ========\n"

echo "==== Python discovery STARTING ===="
# Python library required for Python2/Python3 compatibility through "future"
PY_FUTURE_VERSION=0.18.2
# First, decide which COMP ScramArch to use based on the required OS
if [ "$REQUIRED_OS" = "rhel7" ];
then
WMA_SCRAM_ARCH=slc7_amd64_gcc630
else
WMA_SCRAM_ARCH=slc6_amd64_gcc493
fi
echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH"

suffix=etc/profile.d/init.sh
if [ -d "$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python
elif [ -d "$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python
elif [ -d "$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python
else
echo "Error during job bootstrap: job environment does not contain the init.sh script." >&2
echo " Because of this, we can't load CMSSW. Not good." >&2
exit 11004
fi
source ./submit_env.sh

compPythonPath=`echo $prefix | sed 's|/python||'`
echo "WMAgent bootstrap: COMP Python path is: $compPythonPath"
latestPythonVersion=`ls -t "$prefix"/*/"$suffix" | head -n1 | sed 's|.*/external/python/||' | cut -d '/' -f1`
pythonMajorVersion=`echo $latestPythonVersion | cut -d '.' -f1`
pythonCommand="python"${pythonMajorVersion}
echo "WMAgent bootstrap: latest python release is: $latestPythonVersion"
source "$prefix/$latestPythonVersion/$suffix"
source "$compPythonPath/py2-future/$PY_FUTURE_VERSION/$suffix"
# from ./submit_env.sh
setup_cmsset

command -v $pythonCommand > /dev/null
rc=$?
if [[ $rc != 0 ]]
then
echo "Error during job bootstrap: python isn't available on the worker node." >&2
echo " WMCore/WMAgent REQUIRES at least python2" >&2
exit 11005
else
echo "WMAgent bootstrap: found $pythonCommand at.."
echo `which $pythonCommand`
fi

echo "==== Python discovery FINISHED at $(TZ=GMT date) ===="
# from ./submit_env.sh
setup_python_comp

echo "==== Make sure $HOME is defined ===="
export HOME=${HOME:-$PWD}
Expand Down Expand Up @@ -132,10 +63,11 @@ for i in `ls`; do
echo "== DIR: $i"
done
echo "==== Local directory contents dump FINISHING ===="

echo "======== CMSRunAnalysis.py STARTING at $(TZ=GMT date) ========"
echo "Now running the CMSRunAnalysis.py job in `pwd`..."
set -x
python CMSRunAnalysis.py -r "`pwd`" "$@"
$pythonCommand CMSRunAnalysis.py -r "`pwd`" "$@"
jobrc=$?
set +x
echo "== The job had an exit code of $jobrc "
Expand Down
2 changes: 2 additions & 0 deletions scripts/TweakPSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from ast import literal_eval

from PSetTweaks.PSetTweak import PSetTweak
from Utils.Utilities import decodeBytesToUnicode


def readFileFromTarball(filename, tarball):
Expand All @@ -79,6 +80,7 @@ def readFileFromTarball(filename, tarball):
try:
f = tar_file.extractfile(filename)
content = f.read()
content = decodeBytesToUnicode(content)
break
except KeyError as er:
# Don`t exit due to KeyError, print error. EventBased and FileBased does not have run and lumis
Expand Down
109 changes: 20 additions & 89 deletions scripts/gWMS-CMSRunAnalysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
# difficult-to-impossible to run.
#

# On some sites we know there was some problems with environment cleaning
# with using 'env -i'. To overcome this issue, whenever we start a job, we have
# to save full current environment into file, and whenever it is needed we can load
# it. Be aware, that there are some read-only variables, like: BASHOPTS, BASH_VERSINFO,
# EUID, PPID, SHELLOPTS, UID, etc.
set > startup_environment.sh
sed -e 's/^/export /' startup_environment.sh > tmp_env.sh
mv tmp_env.sh startup_environment.sh
export JOBSTARTDIR=$PWD
echo "======== Startup environment - STARTING ========"

# import some auxiliary functions from a script that is intented to be shared
# with WMCore
source ./submit_env.sh

# from ./submit_env.sh
save_env

echo "======== Startup environment - FINISHING ========"

# Saving START_TIME and when job finishes, check if runtime is not lower than 20m
# If it is lower, sleep the difference. Will not sleep if CRAB3_RUNTIME_DEBUG is set.
Expand Down Expand Up @@ -87,11 +88,10 @@ echo "Hostname: $(hostname -f)"
echo "System: $(uname -a)"
echo "Arguments are $@"

# redirect stderr to stdout, so that it all goes to job_out.*, leaving job_err.* empty
# see https://stackoverflow.com/a/13088401
exec 2>&1
touch jobReport.json
touch WMArchiveReport.json

echo "SCRAM_ARCH=$SCRAM_ARCH"
CRAB_oneEventMode=0
if [ "X$_CONDOR_JOB_AD" != "X" ];
then
Expand Down Expand Up @@ -124,6 +124,8 @@ then
echo "======== HTCONDOR JOB SUMMARY at $(TZ=GMT date) FINISH ========"
fi

touch jobReport.json
touch WMArchiveReport.json
#MM: Are these two lines needed?
touch jobReport.json.$CRAB_Id
touch WMArchiveReport.json.$CRAB_Id
Expand All @@ -137,7 +139,7 @@ time sh ./CMSRunAnalysis.sh "$@" --oneEventMode=$CRAB_oneEventMode
EXIT_STATUS=$?
echo "CMSRunAnalysis.sh complete at $(TZ=GMT date) with (short) exit status $EXIT_STATUS"

echo "======== CMSRunAnalsysis.sh at $(TZ=GMT date) FINISHING ========"
echo "======== CMSRunAnalysis.sh at $(TZ=GMT date) FINISHING ========"

mv jobReport.json jobReport.json.$CRAB_Id
mv WMArchiveReport.json WMArchiveReport.json.$CRAB_Id
Expand All @@ -162,82 +164,11 @@ then
fi

echo "======== User application running completed. Prepare env. for stageout ==="
echo "======== WMAgent CMS environment load starting at $(TZ=GMT date) ========"
if [ -f "$VO_CMS_SW_DIR"/cmsset_default.sh ]
then # LCG style --
echo "WN with a LCG style environment, thus using VO_CMS_SW_DIR=$VO_CMS_SW_DIR"
. $VO_CMS_SW_DIR/cmsset_default.sh
elif [ -f "$OSG_APP"/cmssoft/cms/cmsset_default.sh ]
then # OSG style --
echo "WN with an OSG style environment, thus using OSG_APP=$OSG_APP"
. $OSG_APP/cmssoft/cms/cmsset_default.sh CMSSW_3_3_2
elif [ -f "$CVMFS"/cms.cern.ch/cmsset_default.sh ]
then
echo "WN with CVMFS environment, thus using CVMFS=$CVMFS"
. $CVMFS/cms.cern.ch/cmsset_default.sh
elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ]
then # ok, lets call it CVMFS then
export CVMFS=/cvmfs/cms.cern.ch
echo "WN missing VO_CMS_SW_DIR/OSG_APP/CVMFS environment variable, forcing it to CVMFS=$CVMFS"
. $CVMFS/cmsset_default.sh
else
echo "Error during job bootstrap: VO_CMS_SW_DIR, OSG_APP, CVMFS or /cvmfs were not found." >&2
echo " Because of this, we can't load CMSSW. Not good." >&2
exit 11003
fi
echo "WMAgent bootstrap: WMAgent thinks it found the correct CMSSW setup script"
echo -e "======== WMAgent CMS environment load finished at $(TZ=GMT date) ========\n"

echo "======== python bootstrap for stageout at $(TZ=GMT date) STARTING ========"
# use python from COMP
# Python library required for Python2/Python3 compatibility through "future"
PY_FUTURE_VERSION=0.18.2
# First, decide which COMP ScramArch to use based on the required OS
if [ "$REQUIRED_OS" = "rhel7" ];
then
WMA_SCRAM_ARCH=slc7_amd64_gcc630
else
WMA_SCRAM_ARCH=slc6_amd64_gcc493
fi
echo "Job requires OS: $REQUIRED_OS, thus setting ScramArch to: $WMA_SCRAM_ARCH"

suffix=etc/profile.d/init.sh
if [ -d "$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$VO_CMS_SW_DIR"/COMP/"$WMA_SCRAM_ARCH"/external/python
elif [ -d "$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$OSG_APP"/cmssoft/cms/COMP/"$WMA_SCRAM_ARCH"/external/python
elif [ -d "$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python ]
then
prefix="$CVMFS"/COMP/"$WMA_SCRAM_ARCH"/external/python
else
echo "Error during job bootstrap: job environment does not contain the init.sh script." >&2
echo " Because of this, we can't load CMSSW. Not good." >&2
exit 11004
fi

compPythonPath=`echo $prefix | sed 's|/python||'`
echo "WMAgent bootstrap: COMP Python path is: $compPythonPath"
latestPythonVersion=`ls -t "$prefix"/*/"$suffix" | head -n1 | sed 's|.*/external/python/||' | cut -d '/' -f1`
pythonMajorVersion=`echo $latestPythonVersion | cut -d '.' -f1`
pythonCommand="python"${pythonMajorVersion}
echo "WMAgent bootstrap: latest python release is: $latestPythonVersion"
source "$prefix/$latestPythonVersion/$suffix"
source "$compPythonPath/py2-future/$PY_FUTURE_VERSION/$suffix"
# from ./submit_env.sh
setup_cmsset

command -v $pythonCommand > /dev/null
rc=$?
if [[ $rc != 0 ]]
then
echo "Error during job bootstrap: python isn't available on the worker node." >&2
echo " WMCore/WMAgent REQUIRES at least python2" >&2
exit 11005
else
echo "WMAgent bootstrap: found $pythonCommand at.."
echo `which $pythonCommand`
fi
echo "======== python bootstrap for stageout at $(TZ=GMT date) FINISHED ========"
# from ./submit_env.sh
setup_python_comp

#echo "======== Attempting to notify HTCondor of file stageout ========"
# wrong syntax for chirping, also needs a proper classAd name. Keep commented line for a future fix
Expand All @@ -246,7 +177,7 @@ echo "======== python bootstrap for stageout at $(TZ=GMT date) FINISHED ========
echo "======== Stageout at $(TZ=GMT date) STARTING ========"
rm -f wmcore_initialized
# Note we prevent buffering of stdout/err -- this is due to observed issues in mixing of out/err for stageout plugins
PYTHONUNBUFFERED=1 python2.7 cmscp.py
PYTHONUNBUFFERED=1 $pythonCommand cmscp.py
STAGEOUT_EXIT_STATUS=$?

if [ ! -e wmcore_initialized ];
Expand Down
Loading

0 comments on commit a43a10d

Please sign in to comment.