From 797f6300dc1c9d10d5ebbc27b2b0c6e056a1926e Mon Sep 17 00:00:00 2001 From: Nikhil Nanal Date: Mon, 13 Nov 2023 11:25:20 -0800 Subject: [PATCH] Intel/CI: Enable Spawn tests to test them on CI Cluster - remove other Jenkins stages other than mpich suite related - enable spawn tests from tests.py - remove deletion of dirs in Jenkins file for manual testing on CI cluster. - removed configure options which don't build shmem and shmmods from mpich build. for testing rma/wins_shared_put_flush_load test Signed-off-by: Nikhil Nanal --- contrib/intel/jenkins/Jenkinsfile | 308 +----------------------------- contrib/intel/jenkins/build.py | 4 +- contrib/intel/jenkins/tests.py | 4 +- 3 files changed, 7 insertions(+), 309 deletions(-) diff --git a/contrib/intel/jenkins/Jenkinsfile b/contrib/intel/jenkins/Jenkinsfile index f4812809279..e43a0374986 100644 --- a/contrib/intel/jenkins/Jenkinsfile +++ b/contrib/intel/jenkins/Jenkinsfile @@ -1,6 +1,6 @@ import groovy.transform.Field -properties([disableConcurrentBuilds(abortPrevious: true)]) +/*properties([disableConcurrentBuilds(abortPrevious: true)])*/ @Field def DO_RUN=true @Field def TARGET="main" @Field def SCRIPT_LOCATION="py_scripts/contrib/intel/jenkins" @@ -361,200 +361,11 @@ pipeline { } } } - stage ('build_ucx') { - steps { - script { - dir ("${CUSTOM_WORKSPACE}/ucx") { - checkout scm - def prefix = "python$PYTHON_VERSION ${RUN_LOCATION}/build.py" - def opts = "" - def build_cmd = "" - for (mode in BUILD_MODES) { - for (item in ["libfabric", "fabtests"]) { - opts = "--build_item=${item} --ofi_build_mode=${mode} --ucx" - build_cmd = "${build_cmd} ${prefix} ${opts}; " - } - } - slurm_batch("squirtle,totodile", "1", - "${env.LOG_DIR}/libfabric_ucx_build_log", - "${build_cmd}") - } - } - } - } - stage ('build-daos') { - agent { - node { - label 'daos_head' - customWorkspace CUSTOM_WORKSPACE - } - } - steps { - script { - checkout_external_resources() - dir (CUSTOM_WORKSPACE) { - build("logdir") - build("libfabric", "reg", "daos") - build("fabtests", "reg") - } - } - } - } - stage ('build-gpu') { - agent { - node { - label 'ze' - customWorkspace CUSTOM_WORKSPACE - } - } - steps { - script { - checkout_external_resources() - dir (CUSTOM_WORKSPACE) { - build("logdir") - build("builddir") - build("libfabric", "reg", "gpu") - build("fabtests", "reg") - } - } - } - } } } stage('parallel-tests') { when { equals expected: true, actual: DO_RUN } parallel { - stage('MPI_verbs-rxm_IMB') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["verbs", "rxm"]] - for (def mpi in ["impi"]) { - for (imb_grp = 1; imb_grp < 4; imb_grp++) { - run_middleware(providers, "MPI", "IMB", - "squirtle,totodile", "2", "${mpi}", - "${imb_grp}") - } - } - } - } - } - } - stage('MPI_verbs-rxm_OSU') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["verbs", "rxm"]] - for (def mpi in ["impi", "mpich"]) { - run_middleware(providers, "MPI", "osu", "squirtle,totodile", - "2", "${mpi}") - } - } - } - } - } - stage('MPI_tcp') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["tcp", null]] - for (imb_grp = 1; imb_grp < 4; imb_grp++) { - run_middleware(providers, "MPI", "IMB", - "bulbasaur", "2", "impi", "${imb_grp}") - } - for (def mpi in ["impi", "mpich"]) { - run_middleware(providers, "MPI", "osu", "bulbasaur", "2", - "${mpi}") - } - } - } - } - } - stage('tcp') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("tcp", "bulbasaur", "2", "tcp") - } - } - } - } - stage('verbs-rxm') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm") - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm", "FI_MR_CACHE_MAX_COUNT=0") - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm", "FI_MR_CACHE_MONITOR=userfaultfd") - } - } - } - } - stage('verbs-rxd') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd") - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd", "FI_MR_CACHE_MAX_COUNT=0") - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd", "FI_MR_CACHE_MONITOR=userfaultfd") - } - } - } - } - stage('udp') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("udp", "bulbasaur", "2", "udp") - } - } - } - } - stage('shm') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("shm", "bulbasaur", "1", "shm") - run_fabtests("shm", "bulbasaur", "1", "shm", null, - "FI_SHM_DISABLE_CMA=1") - } - } - } - } - stage('sockets') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("sockets", "bulbasaur", "2", "sockets") - } - } - } - } - stage('ucx') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("ucx", "totodile", "2", "ucx") - } - } - } - } - stage('psm3') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("psm3", "squirtle", "2", "psm3", null, - "PSM3_IDENTIFY=1") - } - } - } - } stage('mpichtestsuite') { steps { script { @@ -572,119 +383,6 @@ pipeline { } } } - stage('SHMEM') { - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["verbs", null], ["tcp", null], - ["sockets", null]], "SHMEM", "shmem", - "squirtle,totodile", "2") - } - } - } - } - stage ('multinode_performance') { - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["tcp", null]], "multinode_performance", - "multinode", "bulbasaur", "2") - } - } - } - } - stage ('oneCCL') { - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["tcp", "rxm"]/*, ["psm3", null]*/], "oneCCL", - "oneccl", "bulbasaur", "2") - } - } - } - } - stage ('oneCCL-GPU-v3') { - agent { node { label 'ze' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu", - "fabrics-ci", "2") - } - } - } - } - stage('daos_tcp') { - agent { node { label 'daos_tcp' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_python(PYTHON_VERSION, - """runtests.py --prov='tcp' --util='rxm' \ - --test=daos \ - --log_file=${env.LOG_DIR}/daos_tcp-rxm_reg""") - } - } - } - } - stage('daos_verbs') { - agent { node { label 'daos_verbs' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_python(PYTHON_VERSION, - """runtests.py --prov='verbs' --util='rxm' \ - --test=daos \ - --log_file=${env.LOG_DIR}/daos_verbs-rxm_reg""") - } - } - } - } - stage ('DMABUF-Tests') { - agent { node { label 'ze' } } - options { skipDefaultCheckout() } - steps { - script { - dir ("${env.WORKSPACE}/${SCRIPT_LOCATION}/") { - dmabuf_output = "${LOG_DIR}/DMABUF-Tests_verbs-rxm_dmabuf" - cmd = """ python3.9 runtests.py --test=dmabuf \ - --prov=verbs --util=rxm""" - slurm_batch("fabrics-ci", "1", "${dmabuf_output}_1_reg", - "${cmd}") - slurm_batch("fabrics-ci", "2", "${dmabuf_output}_2_reg", - "${cmd}") - } - } - } - } - stage ('ze-shm-v3') { - agent { node { label 'ze' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "h2d") - run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "d2d") - run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "xd2d") - } - } - } - } - stage('dsa') { - when { equals expected: true, actual: DO_RUN } - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("shm_dsa", "pikachu", "1", "shm", null, - """FI_SHM_DISABLE_CMA=1 FI_SHM_USE_DSA_SAR=1 \ - FI_LOG_LEVEL=warn""") - } - } - } - } } } stage ('Summary') { @@ -718,7 +416,7 @@ pipeline { send_mail=env.WEEKLY.toBoolean()) } } - aborted { +/* aborted { node ('daos_head') { dir ("${DELETE_LOCATION}/middlewares") { deleteDir() } } @@ -741,6 +439,6 @@ pipeline { dir("${DELETE_LOCATION}") { deleteDir() } dir("${env.WORKSPACE}") { deleteDir() } dir("${env.WORKSPACE}@tmp") { deleteDir() } - } + }*/ } } \ No newline at end of file diff --git a/contrib/intel/jenkins/build.py b/contrib/intel/jenkins/build.py index 2856e798ae9..bc21b4c8a13 100755 --- a/contrib/intel/jenkins/build.py +++ b/contrib/intel/jenkins/build.py @@ -104,9 +104,9 @@ def build_mpich(libfab_installpath_mpich): configure_cmd = f"./configure " configure_cmd += f"--prefix={mpich_build_dir} " configure_cmd += f"--with-libfabric={libfab_installpath_mpich} " - configure_cmd += "--disable-oshmem " + #configure_cmd += "--disable-oshmem " configure_cmd += "--disable-fortran " - configure_cmd += "--without-ch4-shmmods " + #configure_cmd += "--without-ch4-shmmods " configure_cmd += "--with-device=ch4:ofi " configure_cmd += "--without-ze " print(configure_cmd) diff --git a/contrib/intel/jenkins/tests.py b/contrib/intel/jenkins/tests.py index d6ff5ff9cbb..5ff47f1ad34 100755 --- a/contrib/intel/jenkins/tests.py +++ b/contrib/intel/jenkins/tests.py @@ -688,8 +688,8 @@ def __init__(self, jobname, buildno, testname, core_prov, fabric, self.pwd = os.getcwd() self.weekly = weekly self.mpichtests_exclude = { - 'tcp' : { '.' : [('spawn','dir')], - 'rma' : [('win_shared_put_flush_load 3', 'test')], + 'tcp' : { #'.' : [('spawn','dir')], + # 'rma' : [('win_shared_put_flush_load 3', 'test')], 'threads' : [('spawn','dir')], 'threads/comm' : [('idup_nb 4','test'), ('idup_comm_gen 4','test')],