Skip to content

Commit

Permalink
intel/ci: Add code changes to enable weekly job options for mpichsuite.
Browse files Browse the repository at this point in the history
Signed-off-by: Nikhil Nanal<[email protected]>
  • Loading branch information
nikhilnanal committed Sep 1, 2023
1 parent 936b1d3 commit 232d79a
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 66 deletions.
38 changes: 30 additions & 8 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ pipeline {
RUN_LOCATION="${env.WORKSPACE}/${SCRIPT_LOCATION}/"
CUSTOM_WORKSPACE="${CB_HOME}/workspace/${JOB_NAME}/${env.BUILD_NUMBER}"
}

stages {
stage ('opt-out') {
steps {
Expand All @@ -258,17 +257,26 @@ pipeline {
}
}
}
stage ('prepare build') {
when { equals expected: true, actual: DO_RUN }
steps {
script {
echo "Copying build dirs."
build("builddir")
echo "Copying log dirs."
build("logdir", null, null, RELEASE)
build("mpich")
build("impi_mpich")
}
}
}
stage ('parallel-builds') {
when { equals expected: true, actual: DO_RUN }
parallel {
stage ('build') {
steps {
script {
dir (CUSTOM_WORKSPACE) {
echo "Copying build dirs."
build("builddir")
echo "Copying log dirs."
build("logdir", null, null, RELEASE)
for (mode in BUILD_MODES) {
echo "Building Libfabric $mode"
build("libfabric", "$mode")
Expand All @@ -279,6 +287,21 @@ pipeline {
}
}
}
stage ('buildmpich-libfabric') {
steps {
script {
dir("${CUSTOM_WORKSPACE}/mpich"){
checkout scm
echo "Building Libfabric reg"
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_mpich_log",
"""python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
--build_item=libfabric --build_cluster=mpich """
)
}
}
}
}
stage ('build-daos') {
agent {
node {
Expand Down Expand Up @@ -456,8 +479,7 @@ pipeline {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["verbs", "rxm"], ["tcp", null],
["tcp", "rxm"], ["sockets", null]]
def providers = [["tcp", null],["verbs","rxm"]]
for (mpi in MPI_TYPES) {
run_middleware(providers, "mpichtestsuite", "mpichtestsuite",
"squirtle,totodile", "2", "${mpi}")
Expand Down Expand Up @@ -691,4 +713,4 @@ pipeline {
dir("${env.WORKSPACE}@tmp") { deleteDir() }
}
}
}
}
41 changes: 35 additions & 6 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,30 @@ def build_fabtests(libfab_install_path, mode):
common.run_command(['make', '-j32'])
common.run_command(['make', 'install'])

def extract_mpich(mpitype):

if (mpitype == 'mpich'):
src_dir = 'mpich'
dest = 'mpich_temp'
mpich_tar = 'mpich-4.1.tar.gz'
elif (mpitype == 'impi'):
src_dir = 'impi_mpichtest'
dest = 'impi_mpichtest'
mpich_tar = 'mpich-test.tar.gz'
else:
print(f"Invalid mpi type {mpitype}")
exit

cwd = os.getcwd()
if (os.path.exists(f'{cloudbees_config.build_dir}/{dest}/') == True):
common.run_command(['rm','-rf', f'{cloudbees_config.build_dir}/{dest}/'])
os.makedirs(f'{cloudbees_config.build_dir}/{dest}')
shutil.copy(f'{cloudbees_config.scm_dir}/{src_dir}/{mpich_tar}',
f'{cloudbees_config.build_dir}/{dest}/')
os.chdir(f'{cloudbees_config.build_dir}/{dest}/')
common.run_command(['tar','-xvf', f'{cloudbees_config.build_dir}/{dest}/{mpich_tar}'])
os.chdir(cwd)

def copy_build_dir(install_path):
middlewares_path = f'{install_path}/middlewares'
if (os.path.exists(middlewares_path) != True):
Expand All @@ -78,9 +102,6 @@ def copy_build_dir(install_path):
f'{middlewares_path}/shmem')
shutil.copytree(f'{cloudbees_config.build_dir}/oneccl',
f'{middlewares_path}/oneccl')

os.symlink(f'{cloudbees_config.build_dir}/mpich',
f'{middlewares_path}/mpich')
os.symlink(f'{cloudbees_config.build_dir}/impi',
f'{middlewares_path}/impi')
os.symlink(f'{cloudbees_config.build_dir}/ompi',
Expand Down Expand Up @@ -112,13 +133,13 @@ def log_dir(install_path, release=False):

parser = argparse.ArgumentParser()
parser.add_argument('--build_item', help="build libfabric or fabtests",
choices=['libfabric', 'fabtests', 'builddir', 'logdir'])
choices=['libfabric', 'fabtests', 'builddir', 'logdir','mpich', 'impi_mpich'])

parser.add_argument('--ofi_build_mode', help="select buildmode libfabric "\
"build mode", choices=['reg', 'dbg', 'dl'])

parser.add_argument('--build_cluster', help="build libfabric on specified cluster", \
choices=['daos', 'gpu'], default='default')
choices=['daos', 'dsa', 'gpu','mpich'], default='default')
parser.add_argument('--release', help="This job is likely testing a "\
"release and will be checked into a git tree.",
action='store_true')
Expand All @@ -145,11 +166,19 @@ def log_dir(install_path, release=False):
p = re.compile('mpi*')

if (build_item == 'libfabric'):
build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx)
if (cluster == 'mpich'):
libfab_install_path += "/libfabric_mpich"
build_libfabric(libfab_install_path, ofi_build_mode, cluster)
else:
build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx)

elif (build_item == 'fabtests'):
build_fabtests(libfab_install_path, ofi_build_mode)

elif (build_item == 'mpich'):
extract_mpich('mpich')
elif (build_item == 'impi_mpich'):
extract_mpich('impi')
elif (build_item == 'builddir'):
copy_build_dir(install_path)

Expand Down
11 changes: 7 additions & 4 deletions contrib/intel/jenkins/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,21 @@ def intel_mpi_benchmark(core, hosts, mpi, mode, group, user_env, log_file, util)
print(f"Skipping {mpi.upper} {imb.testname} as execute condition fails")
print('-------------------------------------------------------------------')

def mpich_test_suite(core, hosts, mpi, mode, user_env, log_file, util):
def mpich_test_suite(core, hosts, mpi, mode, user_env, log_file, util, weekly=None):

mpich_tests = tests.MpichTestSuite(jobname=jbname,buildno=bno,
testname="MpichTestSuite",core_prov=core,
fabric=fab, mpitype=mpi, hosts=hosts,
ofi_build_mode=mode, user_env=user_env,
log_file=log_file, util_prov=util)
log_file=log_file, util_prov=util, weekly=weekly)

print('-------------------------------------------------------------------')
if (mpich_tests.execute_condn == True):
print(f"Running mpichtestsuite: Spawn Tests for {core}-{util}-{fab}-{mpi}")
mpich_tests.execute_cmd("spawn")
print(f"Running mpichtestsuitefor {core}-{util}-{fab}-{mpi}")
if (mpi == "mpich"):
print("Building mpich")
mpich_tests.build_mpich()
mpich_tests.execute_cmd()
else:
print(f"Skipping {mpi.upper()} {mpich_tests.testname} as exec condn fails")
print('-------------------------------------------------------------------')
Expand Down
4 changes: 3 additions & 1 deletion contrib/intel/jenkins/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __call__(self, parser, namespace, values, option_string=None):
choices=['impi', 'mpich', 'ompi'], default='impi')
parser.add_argument('--log_file', help="Full path to log file",
default=os.environ['DEFAULT_LOG_LOCATION'], type=str)
parser.add_argument('--weekly', help="run weekly", default=False, type=bool)

args = parser.parse_args()
args_core = args.prov
Expand All @@ -45,6 +46,7 @@ def __call__(self, parser, namespace, values, option_string=None):
args_device = args.device
user_env = args.user_env
log_file = args.log_file
weekly = args.weekly

if (args.ofi_build_mode):
ofi_build_mode = args.ofi_build_mode
Expand Down Expand Up @@ -131,7 +133,7 @@ def __call__(self, parser, namespace, values, option_string=None):
if (run_test == 'all' or run_test == 'mpichtestsuite'):
run.mpich_test_suite(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util)
args_util, weekly)

if (run_test == 'all' or run_test == 'IMB'):
run.intel_mpi_benchmark(args_core, hosts, mpi,
Expand Down
138 changes: 91 additions & 47 deletions contrib/intel/jenkins/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
import os

import io
sys.path.append(os.environ['CLOUDBEES_CONFIG'])

import subprocess
Expand Down Expand Up @@ -451,11 +451,11 @@ class MPICH:
def __init__(self, core_prov, hosts, libfab_installpath, nw_interface,
server, client, environ, middlewares_path, util_prov=None):

self.mpich_src = f'{middlewares_path}/mpich'
self.mpich_src = f'{cloudbees_config.build_dir}/mpich_temp'
self.core_prov = core_prov
self.hosts = hosts
self.util_prov = util_prov
self.libfab_installpath = libfab_installpath
self.libfab_installpath = f'{libfab_installpath}/libfabric_mpich'
self.nw_interface = nw_interface
self.server = server
self.client = client
Expand All @@ -472,7 +472,7 @@ def env(self):
cmd += f"export FI_PROVIDER={self.core_prov}; "
cmd += "export I_MPI_FABRICS=ofi; "
cmd += "export MPIR_CVAR_CH4_OFI_ENABLE_ATOMICS=0; "
cmd += "export MPIR_CVAR_CH4_OFI_CAPABILITY_SETS_DEBUG=1; "
cmd += "export MPIR_CVAR_CH4_OFI_CAPABILITY_SETS_DEBUG=0; "
cmd += f"export LD_LIBRARY_PATH={self.mpich_src}/lib:$LD_LIBRARY_PATH; "
cmd += f"export LD_LIBRARY_PATH={self.libfab_installpath}/lib/:"\
"$LD_LIBRARY_PATH; "
Expand Down Expand Up @@ -518,10 +518,12 @@ def __init__(self, core_prov, hosts, libfab_installpath, nw_interface,
def env(self):
cmd = f"bash -c \'source {self.impi_src}/env/vars.sh "\
"-i_mpi_ofi_internal=0; "
cmd += f"source {cloudbees_config.intel_compiler_root}/env/vars.sh; "
if (self.util_prov):
cmd += f"export FI_PROVIDER={self.core_prov}\\;{self.util_prov}; "
else:
cmd += f"export FI_PROVIDER={self.core_prov}; "
cmd += "export FI_IFACE=ib0; "
cmd += "export I_MPI_FABRICS=ofi; "
cmd += f"export LD_LIBRARY_PATH={self.impi_src}/lib:$LD_LIBRARY_PATH; "
cmd += f"export LD_LIBRARY_PATH={self.impi_src}/lib/release:"\
Expand Down Expand Up @@ -688,58 +690,100 @@ def execute_cmd(self):
class MpichTestSuite(Test):

def __init__(self, jobname, buildno, testname, core_prov, fabric,
hosts, mpitype, ofi_build_mode, user_env, log_file, util_prov=None):
hosts, mpitype, ofi_build_mode, user_env, log_file, util_prov=None, weekly=None):

super().__init__(jobname, buildno, testname, core_prov,
fabric, hosts, ofi_build_mode, user_env, log_file, mpitype,
util_prov)

self.mpichsuitepath = f'{self.middlewares_path}/{mpitype}/'\
'mpichsuite/test/mpi/'
self.mpichpath = f'{cloudbees_config.build_dir}/mpich_temp/mpich-4.1/'
self.mpichsuitepath = f'{self.mpichpath}/test/mpi/'
self.impi_mpichtestpath = f'{cloudbees_config.build_dir}/impi_mpichtest/mpich-test'
self.pwd = os.getcwd()
self.mpi_type = mpitype

def testgroup(self, testgroupname):
testpath = f'{self.mpichsuitepath}/{testgroupname}'
tests = []
with open(f'{testpath}/testlist') as file:
for line in file:
if(line[0] != '#' and line[0] != '\n'):
tests.append((line.rstrip('\n')).split(' '))

return tests

def set_options(self, nprocs, timeout=None):
self.mpi.n = nprocs
if (timeout != None):
os.environ['MPIEXEC_TIMEOUT']=timeout

self.weekly = weekly

def create_hostfile(self, file, hostlist):
with open(file, "w") as f:
for host in hostlist:
f.write(f"{host}\n")

def update_testlists(self, filename, category):
with open(filename, 'r') as file:
lines = file.read().splitlines()
for line in lines:
print("line is:" + line)
if (line == category):
lines[lines.index(line)] = f'#{line}'
else:
continue
with open(filename, 'w') as file:
file.write('\n'.join(lines))

def exclude_tests(self, provider):
categories = cloudbees_config.mpichtests_exclude[provider]
for path,items in categories.items():
filename = f'{self.mpichsuitepath}/{path}/testlist'
for item in items:
self.update_testlists(filename, item)

def build_mpich(self):
print("configure mpich")
os.chdir(self.mpichpath)
configure_cmd = f'./configure \
--prefix=/home/cstbuild/cloudbees_middlewares/mpich_temp \
--with-libfabric={self.mpi.libfab_installpath} \
--disable-oshmem --disable-fortran --without-ch4-shmmods \
--with-device=ch4:ofi --without-ze'
print(configure_cmd)
common.run_command(['./autogen.sh'])
common.run_command(shlex.split(configure_cmd))
common.run_command(['make','-j'])
common.run_command(['make','install'])
os.chdir(self.pwd)

@property
def execute_condn(self):
return (self.mpi_type == 'impi' or \
(self.mpi_type == 'mpich' and self.core_prov == 'verbs'))

def execute_cmd(self, testgroupname):
print("Running Tests: " + testgroupname)
tests = []
time = None
os.chdir(f'{self.mpichsuitepath}/{testgroupname}')
tests = self.testgroup(testgroupname)
for test in tests:
testname = test[0]
nprocs = test[1]
args = test[2:]
for item in args:
itemlist = item.split('=')
if (itemlist[0] == 'timelimit'):
time = itemlist[1]
self.set_options(nprocs, timeout=time)
testcmd = f'./{testname}'
outputcmd = shlex.split(self.mpi.env + self.mpi.cmd + testcmd + '\'')
common.run_command(outputcmd)
os.chdir(self.pwd)

return ((self.mpi_type == 'impi' or \
self.mpi_type == 'mpich') and \
(self.core_prov == 'verbs' or self.core_prov == 'tcp'))
def execute_cmd(self):
if (self.mpi_type == 'mpich'):
configure_cmd = './configure'
if (self.weekly):
print(f'Weekly {self.mpi_type} mpichsuite tests')
os.chdir(self.mpichsuitepath)
common.run_command(shlex.split(self.mpi.env + configure_cmd +'\''))
self.exclude_tests(self.core_prov)
testcmd = 'make testing'
outputcmd = shlex.split(self.mpi.env +testcmd + '\'')
common.run_command(outputcmd)
os.chdir(self.pwd)
else:
#PR Tests
print(f"PR {self.mpi_type} mpichsuite tests")
os.chdir(self.mpichsuitepath)
configure_cmd += f' --with-mpi={cloudbees_config.build_dir}/mpich_temp'
common.run_command(shlex.split(configure_cmd))
common.run_command(['make', '-j'])
self.exclude_tests(self.core_prov)
testcmd = "./runtests -tests=testlist -debug -verbose"
common.run_command(shlex.split(self.mpi.env +testcmd + '\''))
os.chdir(self.pwd)
if (self.mpi_type == 'impi' and self.weekly == True):
print (f'Weekly {self.mpi_type} mpichsuite tests')
os.chdir(self.impi_mpichtestpath)
print(self.hosts)
self.create_hostfile(f'{self.impi_mpichtestpath}/hostfile',
self.hosts)
os.environ["I_MPI_HYDRA_HOST_FILE"] = \
f'{self.impi_mpichtestpath}/hostfile'
prov = self.core_prov;
if(self.util_prov):
prov += f";{self.util_prov}"
test_cmd = f'export I_MPI_HYDRA_HOST_FILE={self.impi_mpichtestpath}/hostfile '
test_cmd += f'./test.sh --exclude lin,{self.core_prov},*,*,*,gnu'
common.run_command(shlex.split(self.mpi.env + test_cmd +'\''))
os.chdir(self.pwd)

class OneCCLTests(Test):

Expand Down

0 comments on commit 232d79a

Please sign in to comment.