Skip to content

Commit

Permalink
intel/ci: Add code changes to enable weekly job options for mpichsuite.
Browse files Browse the repository at this point in the history
Signed-off-by: Nikhil Nanal<[email protected]>
  • Loading branch information
nikhilnanal committed Sep 5, 2023
1 parent 936b1d3 commit 0026221
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 67 deletions.
43 changes: 34 additions & 9 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def run_python(version, command, output=null) {

def slurm_batch(partition, node_num, output, command) {
try {
sh """timeout 3600 sbatch --partition=${partition} -N ${node_num} \
sh """timeout 7200 sbatch --partition=${partition} -N ${node_num} \
--wait -o ${output} --open-mode=append --wrap=\'env; ${command}\'
"""
} catch (Exception e) {
Expand Down Expand Up @@ -63,6 +63,9 @@ def run_middleware(providers, stage_name, test, partition, node_num, mpi=null,
if (imb_grp)
base_cmd = "${base_cmd} --imb_grp=${imb_grp}"

if (env.WEEKLY.toBoolean())
base_cmd = "${base_cmd} --weekly=${env.WEEKLY}"

for (prov in providers) {
if (prov[1]) {
echo "Running ${prov[0]}-${prov[1]} ${stage_name}"
Expand Down Expand Up @@ -235,7 +238,6 @@ pipeline {
RUN_LOCATION="${env.WORKSPACE}/${SCRIPT_LOCATION}/"
CUSTOM_WORKSPACE="${CB_HOME}/workspace/${JOB_NAME}/${env.BUILD_NUMBER}"
}

stages {
stage ('opt-out') {
steps {
Expand All @@ -258,17 +260,26 @@ pipeline {
}
}
}
stage ('prepare build') {
when { equals expected: true, actual: DO_RUN }
steps {
script {
echo "Copying build dirs."
build("builddir")
echo "Copying log dirs."
build("logdir", null, null, RELEASE)
build("mpich")
build("impi_mpich")
}
}
}
stage ('parallel-builds') {
when { equals expected: true, actual: DO_RUN }
parallel {
stage ('build') {
steps {
script {
dir (CUSTOM_WORKSPACE) {
echo "Copying build dirs."
build("builddir")
echo "Copying log dirs."
build("logdir", null, null, RELEASE)
for (mode in BUILD_MODES) {
echo "Building Libfabric $mode"
build("libfabric", "$mode")
Expand All @@ -279,6 +290,21 @@ pipeline {
}
}
}
stage ('buildmpich-libfabric') {
steps {
script {
dir("${CUSTOM_WORKSPACE}/mpich"){
checkout scm
echo "Building Libfabric reg"
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_mpich_log",
"""python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
--build_item=libfabric --build_cluster=mpich """
)
}
}
}
}
stage ('build-daos') {
agent {
node {
Expand Down Expand Up @@ -456,8 +482,7 @@ pipeline {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["verbs", "rxm"], ["tcp", null],
["tcp", "rxm"], ["sockets", null]]
def providers = [['tcp'],["verbs","rxm"]]
for (mpi in MPI_TYPES) {
run_middleware(providers, "mpichtestsuite", "mpichtestsuite",
"squirtle,totodile", "2", "${mpi}")
Expand Down Expand Up @@ -691,4 +716,4 @@ pipeline {
dir("${env.WORKSPACE}@tmp") { deleteDir() }
}
}
}
}
41 changes: 35 additions & 6 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,30 @@ def build_fabtests(libfab_install_path, mode):
common.run_command(['make', '-j32'])
common.run_command(['make', 'install'])

def extract_mpich(mpitype):

if (mpitype == 'mpich'):
src_dir = 'mpich'
dest = 'mpich_temp'
mpich_tar = 'mpich-4.1.tar.gz'
elif (mpitype == 'impi'):
src_dir = 'impi_mpichtest'
dest = 'impi_mpichtest'
mpich_tar = 'mpich-test.tar.gz'
else:
print(f"Invalid mpi type {mpitype}")
exit

cwd = os.getcwd()
if (os.path.exists(f'{cloudbees_config.build_dir}/{dest}/') == True):
common.run_command(['rm','-rf', f'{cloudbees_config.build_dir}/{dest}/'])
os.makedirs(f'{cloudbees_config.build_dir}/{dest}')
shutil.copy(f'{cloudbees_config.scm_dir}/{src_dir}/{mpich_tar}',
f'{cloudbees_config.build_dir}/{dest}/')
os.chdir(f'{cloudbees_config.build_dir}/{dest}/')
common.run_command(['tar','-xvf', f'{cloudbees_config.build_dir}/{dest}/{mpich_tar}'])
os.chdir(cwd)

def copy_build_dir(install_path):
middlewares_path = f'{install_path}/middlewares'
if (os.path.exists(middlewares_path) != True):
Expand All @@ -78,9 +102,6 @@ def copy_build_dir(install_path):
f'{middlewares_path}/shmem')
shutil.copytree(f'{cloudbees_config.build_dir}/oneccl',
f'{middlewares_path}/oneccl')

os.symlink(f'{cloudbees_config.build_dir}/mpich',
f'{middlewares_path}/mpich')
os.symlink(f'{cloudbees_config.build_dir}/impi',
f'{middlewares_path}/impi')
os.symlink(f'{cloudbees_config.build_dir}/ompi',
Expand Down Expand Up @@ -112,13 +133,13 @@ def log_dir(install_path, release=False):

parser = argparse.ArgumentParser()
parser.add_argument('--build_item', help="build libfabric or fabtests",
choices=['libfabric', 'fabtests', 'builddir', 'logdir'])
choices=['libfabric', 'fabtests', 'builddir', 'logdir','mpich', 'impi_mpich'])

parser.add_argument('--ofi_build_mode', help="select buildmode libfabric "\
"build mode", choices=['reg', 'dbg', 'dl'])

parser.add_argument('--build_cluster', help="build libfabric on specified cluster", \
choices=['daos', 'gpu'], default='default')
choices=['daos', 'dsa', 'gpu','mpich'], default='default')
parser.add_argument('--release', help="This job is likely testing a "\
"release and will be checked into a git tree.",
action='store_true')
Expand All @@ -145,11 +166,19 @@ def log_dir(install_path, release=False):
p = re.compile('mpi*')

if (build_item == 'libfabric'):
build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx)
if (cluster == 'mpich'):
libfab_install_path += "/libfabric_mpich"
build_libfabric(libfab_install_path, ofi_build_mode, cluster)
else:
build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx)

elif (build_item == 'fabtests'):
build_fabtests(libfab_install_path, ofi_build_mode)

elif (build_item == 'mpich'):
extract_mpich('mpich')
elif (build_item == 'impi_mpich'):
extract_mpich('impi')
elif (build_item == 'builddir'):
copy_build_dir(install_path)

Expand Down
11 changes: 7 additions & 4 deletions contrib/intel/jenkins/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,21 @@ def intel_mpi_benchmark(core, hosts, mpi, mode, group, user_env, log_file, util)
print(f"Skipping {mpi.upper} {imb.testname} as execute condition fails")
print('-------------------------------------------------------------------')

def mpich_test_suite(core, hosts, mpi, mode, user_env, log_file, util):
def mpich_test_suite(core, hosts, mpi, mode, user_env, log_file, util, weekly=None):

mpich_tests = tests.MpichTestSuite(jobname=jbname,buildno=bno,
testname="MpichTestSuite",core_prov=core,
fabric=fab, mpitype=mpi, hosts=hosts,
ofi_build_mode=mode, user_env=user_env,
log_file=log_file, util_prov=util)
log_file=log_file, util_prov=util, weekly=weekly)

print('-------------------------------------------------------------------')
if (mpich_tests.execute_condn == True):
print(f"Running mpichtestsuite: Spawn Tests for {core}-{util}-{fab}-{mpi}")
mpich_tests.execute_cmd("spawn")
print(f"Running mpichtestsuitefor {core}-{util}-{fab}-{mpi}")
if (mpi == "mpich"):
print("Building mpich")
mpich_tests.build_mpich()
mpich_tests.execute_cmd()
else:
print(f"Skipping {mpi.upper()} {mpich_tests.testname} as exec condn fails")
print('-------------------------------------------------------------------')
Expand Down
4 changes: 3 additions & 1 deletion contrib/intel/jenkins/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __call__(self, parser, namespace, values, option_string=None):
choices=['impi', 'mpich', 'ompi'], default='impi')
parser.add_argument('--log_file', help="Full path to log file",
default=os.environ['DEFAULT_LOG_LOCATION'], type=str)
parser.add_argument('--weekly', help="run weekly", default=False, type=bool)

args = parser.parse_args()
args_core = args.prov
Expand All @@ -45,6 +46,7 @@ def __call__(self, parser, namespace, values, option_string=None):
args_device = args.device
user_env = args.user_env
log_file = args.log_file
weekly = args.weekly

if (args.ofi_build_mode):
ofi_build_mode = args.ofi_build_mode
Expand Down Expand Up @@ -131,7 +133,7 @@ def __call__(self, parser, namespace, values, option_string=None):
if (run_test == 'all' or run_test == 'mpichtestsuite'):
run.mpich_test_suite(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util)
args_util, weekly)

if (run_test == 'all' or run_test == 'IMB'):
run.intel_mpi_benchmark(args_core, hosts, mpi,
Expand Down
Loading

0 comments on commit 0026221

Please sign in to comment.