Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

port to perlmutter/intel #3

Open
wants to merge 3 commits into
base: ihesp-hires-master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions config/cesm/config_inputdata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@
<address>https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata</address>
</server>

<server>
<protocol>wget</protocol>
<address>ftp://ftp.cgd.ucar.edu/cesm/inputdata</address>
<user>anonymous</user>
<password>[email protected]</password>
<checksum>../inputdata_checksum.dat</checksum>
</server>

<!--
<server>
<comment>grid ftp requires the globus-url-copy tool on the client side </comment>
<protocol>gftp</protocol>
Expand All @@ -28,14 +36,7 @@
<password>[email protected]</password>
<checksum>../inputdata_checksum.dat</checksum>
</server>

<server>
<protocol>wget</protocol>
<address>ftp://ftp.cgd.ucar.edu/cesm/inputdata</address>
<user>anonymous</user>
<password>[email protected]</password>
<checksum>../inputdata_checksum.dat</checksum>
</server>
-->


</inputdata>
43 changes: 11 additions & 32 deletions config/cesm/machines/config_compilers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -656,50 +656,29 @@ using a fortran linker.
</SLIBS>
</compiler>

<compiler MACH="cori-haswell" COMPILER="intel">
<compiler MACH="perlmutter" COMPILER="intel">
<NETCDF_C_PATH>$ENV{NETCDF_C_ROOT}</NETCDF_C_PATH>
<NETCDF_FORTRAN_PATH>$ENV{NETCDF_FORTRAN_ROOT}</NETCDF_FORTRAN_PATH>
<PNETCDF_PATH>$ENV{PARALLEL_NETCDF_ROOT}</PNETCDF_PATH>
<CONFIG_ARGS>
<base> --host=Linux </base>
</CONFIG_ARGS>
<CFLAGS>
<append> -xCORE-AVX2 </append>
<append> -march=core-avx2 </append>
</CFLAGS>
<FFLAGS>
<append> -xCORE-AVX2 </append>
<append> -march=core-avx2 </append>
</FFLAGS>
<SLIBS>
<append> -L$(NETCDF_DIR) -lnetcdff -Wl,--as-needed,-L$(NETCDF_DIR)/lib -lnetcdff -lnetcdf </append>
</SLIBS>
<CPPDEFS>
<append MODEL="gptl"> -DHAVE_PAPI -DHAVE_SLASHPROC </append>
</CPPDEFS>
<LDFLAGS>
<append>-mkl </append>
<append> -Wl,-rpath,$ENV{NETCDF_FORTRAN_ROOT}/lib -Wl,-rpath,$ENV{NETCDF_C_ROOT}/lib -Wl,-rpath,$ENV{PARALLEL_NETCDF_ROOT}/lib </append>
</LDFLAGS>
<!-- Bug in the intel/17.0.1 compiler requires this, remove this line when compiler is updated -->
<HAS_F2008_CONTIGUOUS>FALSE</HAS_F2008_CONTIGUOUS>
</compiler>

<compiler MACH="cori-knl" COMPILER="intel">
<CONFIG_ARGS>
<base> --host=Linux </base>
</CONFIG_ARGS>
<CFLAGS>
<append> -xMIC-AVX512 </append>
</CFLAGS>
<FFLAGS>
<append> -xMIC-AVX512 </append>
</FFLAGS>
<SLIBS>
<append> -L$(NETCDF_DIR) -lnetcdff -Wl,--as-needed,-L$(NETCDF_DIR)/lib -lnetcdff -lnetcdf </append>
<append> -L$(NETCDF_FORTRAN_ROOT)/lib -lnetcdff -L$ENV{NETCDF_C_ROOT}/lib -lnetcdf -L$(PARALLEL_NETCDF_ROOT)/lib -lpnetcdf </append>
<append MPILIB="mpich"> -qmkl=cluster </append>
</SLIBS>
<CPPDEFS>
<append MODEL="gptl"> -DHAVE_SLASHPROC</append>
<CPPDEFS>
<append MODEL="gptl"> -DHAVE_NANOTIME -DBIT64 -DHAVE_VPRINTF -DHAVE_BACKTRACE -DHAVE_SLASHPROC -DHAVE_COMM_F2C -DHAVE_TIMES -DHAVE_GETTIMEOFDAY </append>
</CPPDEFS>
<LDFLAGS>
<append>-mkl -lmemkind -zmuldefs</append>
</LDFLAGS>
<!-- Bug in the intel/17.0.1 compiler requires this, remove this line when compiler is updated -->
<HAS_F2008_CONTIGUOUS>FALSE</HAS_F2008_CONTIGUOUS>
</compiler>

<compiler MACH="derecho">
Expand Down
30 changes: 20 additions & 10 deletions config/cesm/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1904,8 +1904,10 @@ This allows using a different mpirun command to launch unit tests
<modules compiler="intel">
<command name="load">PrgEnv-intel</command>
<command name="switch">intel intel/2023.1.0</command>
<command name="use">/global/cfs/cdirs/ccsm1/sw/perlmutter/modules/intel/2023.1.0/</command>

</modules>
<modules compiler="cray">
<!-- <modules compiler="cray">
<command name="load">PrgEnv-cray</command>
<command name="switch">cce cce/16.0.1</command>
</modules>
Expand All @@ -1917,36 +1919,44 @@ This allows using a different mpirun command to launch unit tests
<command name="load">PrgEnv-nvhpc</command>
<command name="switch">nvhpc nvhpc/23.1</command>
</modules>
<modules>
-->
<modules>
<command name="swap">craype craype/2.7.23</command>
</modules>
<modules>
<command name="switch">cray-libsci/23.09.1.1</command>
</modules>
<modules>
<command name="load">cray-mpich/8.1.25</command>
</modules>
<modules mpilib="mpi-serial">
<command name="rm">cray-netcdf-hdf5parallel</command>
<command name="rm">cray-hdf5-parallel</command>
<command name="rm">cray-parallel-netcdf</command>
<command name="load">cray-hdf5/1.12.2.7</command>
<command name="load">cray-netcdf/4.9.0.7</command>
</modules>
<modules mpilib="!mpi-serial">
<command name="load">cray-hdf5-parallel/1.12.2.7</command>
<command name="load">cray-netcdf-hdf5parallel/4.9.0.7</command>
<command name="load">cray-parallel-netcdf/1.12.3.7</command>
<modules mpilib="mpich">
<command name="load">cray-mpich/8.1.25</command>
<command name="use">/global/cfs/cdirs/ccsm1/sw/perlmutter/modules/cray-mpich/8.1.25-dr5sgoi/intel/2023.1.0/</command>
<command name="load">netcdf-c/4.9.2</command>
<command name="load">netcdf-fortran/4.6.1</command>
<command name="load">parallel-netcdf/1.12.3</command>
<command name="load">parallelio/2.6.2</command>
</modules>
<modules>
<command name="load">cmake/3.24.3</command>
</modules>
</module_system>
<environment_variables>
<env name="OMP_STACKSIZE">256M</env>
<!-- <env name="OMP_PROC_BIND">spread</env>
<env name="DVS_MAXNODES">1__</env>
<env name="MPICH_MPIIO_DVS_MAXNODES">24</env>
<env name="PIO_INCDIR">$ENV{PARALLELIO_ROOT}/include</env>
<env name="PIO_LIBDIR">$ENV{PARALLELIO_ROOT}/lib</env>
<!-- <env name="OMP_PROC_BIND">spread</env>
<env name="OMP_PLACES">threads</env> -->
</environment_variables>
<resource_limits>
<resource name="RLIMIT_STACK">-1</resource>
</resource_limits>
</machine>

<machine MACH="pleiades-bro">
Expand Down
2 changes: 2 additions & 0 deletions config/xml_schemas/config_compilers_v2.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@
<xs:element name="PETSC_PATH" type="systemPath"/>
<xs:element name="PFUNIT_PATH" type="systemPath"/>
<xs:element name="PIO_FILESYSTEM_HINTS" type="xs:token"/>
<xs:element name="PIO_INCDIR" type="systemPath"/>
<xs:element name="PIO_LIBDIR" type="systemPath"/>
<xs:element name="PNETCDF_PATH" type="systemPath"/>
<xs:element name="SCC" type="systemPath"/>
<xs:element name="SCXX" type="systemPath"/>
Expand Down
3 changes: 2 additions & 1 deletion scripts/lib/CIME/Servers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pylint: disable=import-error
# pylint: disable=import-error
from distutils.spawn import find_executable

has_gftp = find_executable("globus-url-copy")
has_svn = find_executable("svn")
has_wget = find_executable("wget")
Expand Down
80 changes: 62 additions & 18 deletions scripts/lib/CIME/Servers/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,69 @@
# pylint: disable=super-init-not-called
from CIME.XML.standard_module_setup import *
from CIME.Servers.generic_server import GenericServer
from CIME.utils import Timeout
from ftplib import FTP as FTPpy
from ftplib import all_errors as all_ftp_errors
import socket

logger = logging.getLogger(__name__)
# I think that multiple inheritence would be useful here, but I couldnt make it work
# in a py2/3 compatible way.
class FTP(GenericServer):
def __init__(self, address, user='', passwd=''):
ftp_server, root_address = address.split('/', 1)
logger.info("server address {} root path {}".format(ftp_server, root_address))
self.ftp = FTPpy(ftp_server)
def __init__(self, address, user="", passwd="", server=None):
if not user:
user = ''
user = ""
if not passwd:
passwd = ''

passwd = ""
expect(server, " Must call via ftp_login function")
root_address = address.split("/", 1)[1]
self.ftp = server
self._ftp_server = address

stat = self.ftp.login(user, passwd)
logger.debug("login stat {}".format(stat))
if "Login successful" not in stat:
logging.warning("FAIL: Could not login to ftp server {}\n error {}".format(ftp_server, stat))
logging.warning(
"FAIL: Could not login to ftp server {}\n error {}".format(
address, stat
)
)
return None
stat = self.ftp.cwd(root_address)
logger.debug("cwd {} stat {}".format(root_address,stat))
try:
stat = self.ftp.cwd(root_address)
except all_ftp_errors as err:
logging.warning("ftplib returned error {}".format(err))
return None

logger.debug("cwd {} stat {}".format(root_address, stat))
if "Directory successfully changed" not in stat:
logging.warning("FAIL: Could not cd to server root directory {}\n error {}".format(root_address, stat))
logging.warning(
"FAIL: Could not cd to server root directory {}\n error {}".format(
root_address, stat
)
)
return None

@classmethod
def ftp_login(cls, address, user="", passwd=""):
ftp_server, root_address = address.split("/", 1)
logger.info("server address {} root path {}".format(ftp_server, root_address))
try:
with Timeout(60):
ftp = FTPpy(ftp_server)

except socket.error as e:
logger.warning("ftp login timeout! {} ".format(e))
return None
except RuntimeError:
logger.warning("ftp login timeout!")
return None
result = None
try:
result = cls(address, user=user, passwd=passwd, server=ftp)
except all_ftp_errors as e:
logger.warning("ftp error: {}".format(e))

return result

def fileexists(self, rel_path):
try:
Expand All @@ -42,20 +77,29 @@ def fileexists(self, rel_path):

if rel_path not in stat:
if not stat or not stat[0].startswith(rel_path):
logging.warning("FAIL: File {} not found.\nerror {}".format(rel_path, stat))
logging.warning(
"FAIL: File {} not found.\nerror {}".format(rel_path, stat)
)
return False
return True

def getfile(self, rel_path, full_path):
try:
stat = self.ftp.retrbinary('RETR {}'.format(rel_path), open(full_path, "wb").write)
stat = self.ftp.retrbinary(
"RETR {}".format(rel_path), open(full_path, "wb").write
)
except all_ftp_errors:
if os.path.isfile(full_path):
os.remove(full_path)
logger.warning("ERROR from ftp server, trying next server")
return False

if (stat != '226 Transfer complete.'):
logging.warning("FAIL: Failed to retreve file '{}' from FTP repo '{}' stat={}\n".
format(rel_path, self._ftp_server, stat))
if stat != "226 Transfer complete.":
logging.warning(
"FAIL: Failed to retreve file '{}' from FTP repo '{}' stat={}\n".format(
rel_path, self._ftp_server, stat
)
)
return False
return True

Expand All @@ -67,4 +111,4 @@ def getdirectory(self, rel_path, full_path):
return False

for _file in stat:
self.getfile(_file, full_path+os.sep+os.path.basename(_file))
self.getfile(_file, full_path + os.sep + os.path.basename(_file))
12 changes: 8 additions & 4 deletions scripts/lib/CIME/Servers/generic_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@

from CIME.XML.standard_module_setup import *
from socket import _GLOBAL_DEFAULT_TIMEOUT

logger = logging.getLogger(__name__)


class GenericServer(object):
def __init__(self, host=' ',user=' ', passwd=' ', acct=' ', timeout=_GLOBAL_DEFAULT_TIMEOUT):
def __init__(
self, host=" ", user=" ", passwd=" ", acct=" ", timeout=_GLOBAL_DEFAULT_TIMEOUT
):
raise NotImplementedError

def fileexists(self, rel_path):
''' Returns True if rel_path exists on server '''
"""Returns True if rel_path exists on server"""
raise NotImplementedError

def getfile(self, rel_path, full_path):
''' Get file from rel_path on server and place in location full_path on client
fail if full_path already exists on client, return True if successful '''
"""Get file from rel_path on server and place in location full_path on client
fail if full_path already exists on client, return True if successful"""
raise NotImplementedError
43 changes: 32 additions & 11 deletions scripts/lib/CIME/Servers/gftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,52 @@

logger = logging.getLogger(__name__)


class GridFTP(GenericServer):
def __init__(self, address, user='', passwd=''):
def __init__(self, address, user="", passwd=""):
self._root_address = address

def fileexists(self, rel_path):
stat,out,err = run_cmd("globus-url-copy -list {}".format(os.path.join(self._root_address, os.path.dirname(rel_path))+os.sep))
stat, out, err = run_cmd(
"globus-url-copy -list {}".format(
os.path.join(self._root_address, os.path.dirname(rel_path)) + os.sep
)
)
if stat or os.path.basename(rel_path) not in out:
logging.warning("FAIL: File {} not found.\nstat={} error={}".format(rel_path, stat, err))
logging.warning(
"FAIL: File {} not found.\nstat={} error={}".format(rel_path, stat, err)
)
return False
return True

def getfile(self, rel_path, full_path):
stat, _,err = run_cmd("globus-url-copy -v {} file://{}".format(os.path.join(self._root_address, rel_path), full_path))
stat, _, err = run_cmd(
"globus-url-copy -v {} file://{}".format(
os.path.join(self._root_address, rel_path), full_path
)
)

if (stat != 0):
logging.warning("FAIL: GridFTP repo '{}' does not have file '{}' error={}\n".
format(self._root_address,rel_path, err))
if stat != 0:
logging.warning(
"FAIL: GridFTP repo '{}' does not have file '{}' error={}\n".format(
self._root_address, rel_path, err
)
)
return False
return True

def getdirectory(self, rel_path, full_path):
stat, _,err = run_cmd("globus-url-copy -v -r {}{} file://{}{}".format(os.path.join(self._root_address, rel_path), os.sep, full_path, os.sep))
stat, _, err = run_cmd(
"globus-url-copy -v -r {}{} file://{}{}".format(
os.path.join(self._root_address, rel_path), os.sep, full_path, os.sep
)
)

if (stat != 0):
logging.warning("FAIL: GridFTP repo '{}' does not have directory '{}' error={}\n".
format(self._root_address,rel_path, err))
if stat != 0:
logging.warning(
"FAIL: GridFTP repo '{}' does not have directory '{}' error={}\n".format(
self._root_address, rel_path, err
)
)
return False
return True
Loading