Skip to content

Commit

Permalink
contrib/intel/jenkins: Make UCX build on compute node
Browse files Browse the repository at this point in the history
UCX has version/environment issues when building on
head node and running on a compute node. Changing its
build args and forcing to build on a compute node will
solve the test failures.

Signed-off-by: Zach Dworkin <[email protected]>
  • Loading branch information
zachdworkin committed Sep 28, 2023
1 parent c9020f9 commit 0e0e4f6
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
29 changes: 21 additions & 8 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,27 @@ pipeline {
}
}
}
stage ('build_ucx') {
steps {
script {
dir ("${CUSTOM_WORKSPACE}/ucx") {
checkout scm
def prefix = "python$PYTHON_VERSION ${RUN_LOCATION}/build.py"
def opts = ""
def build_cmd = ""
for (mode in BUILD_MODES) {
for (item in ["libfabric", "fabtests"]) {
opts = "--build_item=${item} --ofi_build_mode=${mode} --ucx"
build_cmd = "${build_cmd} ${prefix} ${opts}; "
}
}
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_ucx_build_log",
"${build_cmd}")
}
}
}
}
stage ('build-daos') {
agent {
node {
Expand Down Expand Up @@ -471,14 +492,6 @@ pipeline {
stage('ucx') {
steps {
script {
dir (CUSTOM_WORKSPACE) {
for (mode in BUILD_MODES) {
echo "Building Libfabric $mode"
build("libfabric", "${mode}", null, false, "--ucx")
echo "Building Fabtests $mode"
build("fabtests", "${mode}", null, false, "--ucx")
}
}
dir (RUN_LOCATION) {
run_fabtests("ucx", "totodile", "2", "ucx")
}
Expand Down
8 changes: 6 additions & 2 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def build_libfabric(libfab_install_path, mode, cluster=None, ucx=None):
prov_list = common.daos_prov_list
elif (cluster == 'gpu'):
prov_list = common.gpu_prov_list
elif (ucx):
prov_list = common.ucx_prov_list
else:
prov_list = common.default_prov_list

Expand All @@ -41,7 +43,7 @@ def build_libfabric(libfab_install_path, mode, cluster=None, ucx=None):
for op in common.common_disable_list:
config_cmd.append(f'--enable-{op}=no')

if (cluster == 'default' and build_item != 'libfabric_mpich'):
if (cluster == 'default' and build_item != 'libfabric_mpich' and not ucx):
for op in common.default_enable_list:
config_cmd.append(f'--enable-{op}')

Expand Down Expand Up @@ -186,13 +188,15 @@ def log_dir(install_path, release=False):
libfab_install_path = f'{cloudbees_config.install_dir}/{jobname}/{buildno}/{ofi_build_mode}'

if (ucx):
libfab_install_path += "/ucx"
libfab_install_path += '/ucx'
workspace += '/ucx'

p = re.compile('mpi*')

if (build_item == 'libfabric'):
build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx)
elif (build_item == 'libfabric_mpich'):
workspace += '/mpich'
build_libfabric(f'{libfab_install_path}/libfabric_mpich',
ofi_build_mode, cluster)
elif (build_item == 'mpich'):
Expand Down
3 changes: 3 additions & 0 deletions contrib/intel/jenkins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ def run(self):
'psm3',
'ucx'
]
ucx_prov_list = [
'ucx'
]
daos_prov_list = [
'verbs',
'tcp'
Expand Down

0 comments on commit 0e0e4f6

Please sign in to comment.