Skip to content

Commit

Permalink
contrib/intel/jenkins: Make UCX build on compute node
Browse files Browse the repository at this point in the history
UCX has version/environment issues when building on
head node and running on a compute node. Changing its
build args and forcing to build on a compute node will
solve the test failures.

Signed-off-by: Zach Dworkin <[email protected]>
  • Loading branch information
zachdworkin committed Oct 2, 2023
1 parent 862428b commit 102a18d
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
29 changes: 21 additions & 8 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,27 @@ pipeline {
}
}
}
stage ('build_ucx') {
steps {
script {
dir ("${CUSTOM_WORKSPACE}/ucx") {
checkout scm
def prefix = "python$PYTHON_VERSION ${RUN_LOCATION}/build.py"
def opts = ""
def build_cmd = ""
for (mode in BUILD_MODES) {
for (item in ["libfabric", "fabtests"]) {
opts = "--build_item=${item} --ofi_build_mode=${mode} --ucx"
build_cmd = "${build_cmd} ${prefix} ${opts}; "
}
}
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_ucx_build_log",
"${build_cmd}")
}
}
}
}
stage ('build-daos') {
agent {
node {
Expand Down Expand Up @@ -471,14 +492,6 @@ pipeline {
stage('ucx') {
steps {
script {
dir (CUSTOM_WORKSPACE) {
for (mode in BUILD_MODES) {
echo "Building Libfabric $mode"
build("libfabric", "${mode}", null, false, "--ucx")
echo "Building Fabtests $mode"
build("fabtests", "${mode}", null, false, "--ucx")
}
}
dir (RUN_LOCATION) {
run_fabtests("ucx", "totodile", "2", "ucx")
}
Expand Down
12 changes: 7 additions & 5 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def build_libfabric(libfab_install_path, mode, cluster=None, ucx=None):
config_cmd.append('--enable-debug')
elif (mode == 'dl'):
enable_prov_val='dl'

if (cluster == 'daos'):
prov_list = common.daos_prov_list
elif (cluster == 'gpu'):
Expand All @@ -32,16 +33,16 @@ def build_libfabric(libfab_install_path, mode, cluster=None, ucx=None):
prov_list = common.default_prov_list

for prov in prov_list:
if (prov == 'ucx'):
if (ucx):
config_cmd.append('--enable-ucx=yes')
if (ucx):
config_cmd.append('--enable-ucx=yes')
break
else:
config_cmd.append(f'--enable-{prov}={enable_prov_val}')

for op in common.common_disable_list:
config_cmd.append(f'--enable-{op}=no')

if (cluster == 'default' and build_item != 'libfabric_mpich'):
if (cluster == 'default' and build_item != 'libfabric_mpich' and not ucx):
for op in common.default_enable_list:
config_cmd.append(f'--enable-{op}')

Expand Down Expand Up @@ -187,7 +188,8 @@ def log_dir(install_path, release=False):
libfab_install_path = f'{cloudbees_config.install_dir}/{jobname}/{buildno}/{ofi_build_mode}'

if (ucx):
libfab_install_path += "/ucx"
libfab_install_path += '/ucx'
workspace += '/ucx'

p = re.compile('mpi*')

Expand Down
3 changes: 1 addition & 2 deletions contrib/intel/jenkins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ def run(self):
'sockets',
'udp',
'shm',
'psm3',
'ucx'
'psm3'
]
daos_prov_list = [
'verbs',
Expand Down

0 comments on commit 102a18d

Please sign in to comment.