Skip to content

Commit

Permalink
Intel/CI: Enable Spawn tests to test them on CI Cluster
Browse files Browse the repository at this point in the history
- remove other Jenkins stages other than mpich suite related
- enable spawn tests from tests.py
- remove deletion of dirs in Jenkins file for manual testing on CI cluster.
- removed configure options which don't build shmem and shmmods from mpich build.
  for testing rma/wins_shared_put_flush_load test

Signed-off-by: Nikhil Nanal <[email protected]>
  • Loading branch information
nikhilnanal committed Nov 13, 2023
1 parent 7ee5bc1 commit 797f630
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 309 deletions.
308 changes: 3 additions & 305 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import groovy.transform.Field

properties([disableConcurrentBuilds(abortPrevious: true)])
/*properties([disableConcurrentBuilds(abortPrevious: true)])*/
@Field def DO_RUN=true
@Field def TARGET="main"
@Field def SCRIPT_LOCATION="py_scripts/contrib/intel/jenkins"
Expand Down Expand Up @@ -361,200 +361,11 @@ pipeline {
}
}
}
stage ('build_ucx') {
steps {
script {
dir ("${CUSTOM_WORKSPACE}/ucx") {
checkout scm
def prefix = "python$PYTHON_VERSION ${RUN_LOCATION}/build.py"
def opts = ""
def build_cmd = ""
for (mode in BUILD_MODES) {
for (item in ["libfabric", "fabtests"]) {
opts = "--build_item=${item} --ofi_build_mode=${mode} --ucx"
build_cmd = "${build_cmd} ${prefix} ${opts}; "
}
}
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_ucx_build_log",
"${build_cmd}")
}
}
}
}
stage ('build-daos') {
agent {
node {
label 'daos_head'
customWorkspace CUSTOM_WORKSPACE
}
}
steps {
script {
checkout_external_resources()
dir (CUSTOM_WORKSPACE) {
build("logdir")
build("libfabric", "reg", "daos")
build("fabtests", "reg")
}
}
}
}
stage ('build-gpu') {
agent {
node {
label 'ze'
customWorkspace CUSTOM_WORKSPACE
}
}
steps {
script {
checkout_external_resources()
dir (CUSTOM_WORKSPACE) {
build("logdir")
build("builddir")
build("libfabric", "reg", "gpu")
build("fabtests", "reg")
}
}
}
}
}
}
stage('parallel-tests') {
when { equals expected: true, actual: DO_RUN }
parallel {
stage('MPI_verbs-rxm_IMB') {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["verbs", "rxm"]]
for (def mpi in ["impi"]) {
for (imb_grp = 1; imb_grp < 4; imb_grp++) {
run_middleware(providers, "MPI", "IMB",
"squirtle,totodile", "2", "${mpi}",
"${imb_grp}")
}
}
}
}
}
}
stage('MPI_verbs-rxm_OSU') {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["verbs", "rxm"]]
for (def mpi in ["impi", "mpich"]) {
run_middleware(providers, "MPI", "osu", "squirtle,totodile",
"2", "${mpi}")
}
}
}
}
}
stage('MPI_tcp') {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["tcp", null]]
for (imb_grp = 1; imb_grp < 4; imb_grp++) {
run_middleware(providers, "MPI", "IMB",
"bulbasaur", "2", "impi", "${imb_grp}")
}
for (def mpi in ["impi", "mpich"]) {
run_middleware(providers, "MPI", "osu", "bulbasaur", "2",
"${mpi}")
}
}
}
}
}
stage('tcp') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("tcp", "bulbasaur", "2", "tcp")
}
}
}
}
stage('verbs-rxm') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs",
"rxm")
run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs",
"rxm", "FI_MR_CACHE_MAX_COUNT=0")
run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs",
"rxm", "FI_MR_CACHE_MONITOR=userfaultfd")
}
}
}
}
stage('verbs-rxd') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("verbs-rxd", "squirtle", "2", "verbs",
"rxd")
run_fabtests("verbs-rxd", "squirtle", "2", "verbs",
"rxd", "FI_MR_CACHE_MAX_COUNT=0")
run_fabtests("verbs-rxd", "squirtle", "2", "verbs",
"rxd", "FI_MR_CACHE_MONITOR=userfaultfd")
}
}
}
}
stage('udp') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("udp", "bulbasaur", "2", "udp")
}
}
}
}
stage('shm') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("shm", "bulbasaur", "1", "shm")
run_fabtests("shm", "bulbasaur", "1", "shm", null,
"FI_SHM_DISABLE_CMA=1")
}
}
}
}
stage('sockets') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("sockets", "bulbasaur", "2", "sockets")
}
}
}
}
stage('ucx') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("ucx", "totodile", "2", "ucx")
}
}
}
}
stage('psm3') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("psm3", "squirtle", "2", "psm3", null,
"PSM3_IDENTIFY=1")
}
}
}
}
stage('mpichtestsuite') {
steps {
script {
Expand All @@ -572,119 +383,6 @@ pipeline {
}
}
}
stage('SHMEM') {
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["verbs", null], ["tcp", null],
["sockets", null]], "SHMEM", "shmem",
"squirtle,totodile", "2")
}
}
}
}
stage ('multinode_performance') {
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["tcp", null]], "multinode_performance",
"multinode", "bulbasaur", "2")
}
}
}
}
stage ('oneCCL') {
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["tcp", "rxm"]/*, ["psm3", null]*/], "oneCCL",
"oneccl", "bulbasaur", "2")
}
}
}
}
stage ('oneCCL-GPU-v3') {
agent { node { label 'ze' } }
options { skipDefaultCheckout() }
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu",
"fabrics-ci", "2")
}
}
}
}
stage('daos_tcp') {
agent { node { label 'daos_tcp' } }
options { skipDefaultCheckout() }
steps {
script {
dir (RUN_LOCATION) {
run_python(PYTHON_VERSION,
"""runtests.py --prov='tcp' --util='rxm' \
--test=daos \
--log_file=${env.LOG_DIR}/daos_tcp-rxm_reg""")
}
}
}
}
stage('daos_verbs') {
agent { node { label 'daos_verbs' } }
options { skipDefaultCheckout() }
steps {
script {
dir (RUN_LOCATION) {
run_python(PYTHON_VERSION,
"""runtests.py --prov='verbs' --util='rxm' \
--test=daos \
--log_file=${env.LOG_DIR}/daos_verbs-rxm_reg""")
}
}
}
}
stage ('DMABUF-Tests') {
agent { node { label 'ze' } }
options { skipDefaultCheckout() }
steps {
script {
dir ("${env.WORKSPACE}/${SCRIPT_LOCATION}/") {
dmabuf_output = "${LOG_DIR}/DMABUF-Tests_verbs-rxm_dmabuf"
cmd = """ python3.9 runtests.py --test=dmabuf \
--prov=verbs --util=rxm"""
slurm_batch("fabrics-ci", "1", "${dmabuf_output}_1_reg",
"${cmd}")
slurm_batch("fabrics-ci", "2", "${dmabuf_output}_2_reg",
"${cmd}")
}
}
}
}
stage ('ze-shm-v3') {
agent { node { label 'ze' } }
options { skipDefaultCheckout() }
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "h2d")
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "d2d")
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "xd2d")
}
}
}
}
stage('dsa') {
when { equals expected: true, actual: DO_RUN }
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("shm_dsa", "pikachu", "1", "shm", null,
"""FI_SHM_DISABLE_CMA=1 FI_SHM_USE_DSA_SAR=1 \
FI_LOG_LEVEL=warn""")
}
}
}
}
}
}
stage ('Summary') {
Expand Down Expand Up @@ -718,7 +416,7 @@ pipeline {
send_mail=env.WEEKLY.toBoolean())
}
}
aborted {
/* aborted {
node ('daos_head') {
dir ("${DELETE_LOCATION}/middlewares") { deleteDir() }
}
Expand All @@ -741,6 +439,6 @@ pipeline {
dir("${DELETE_LOCATION}") { deleteDir() }
dir("${env.WORKSPACE}") { deleteDir() }
dir("${env.WORKSPACE}@tmp") { deleteDir() }
}
}*/
}
}
4 changes: 2 additions & 2 deletions contrib/intel/jenkins/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def build_mpich(libfab_installpath_mpich):
configure_cmd = f"./configure "
configure_cmd += f"--prefix={mpich_build_dir} "
configure_cmd += f"--with-libfabric={libfab_installpath_mpich} "
configure_cmd += "--disable-oshmem "
#configure_cmd += "--disable-oshmem "
configure_cmd += "--disable-fortran "
configure_cmd += "--without-ch4-shmmods "
#configure_cmd += "--without-ch4-shmmods "
configure_cmd += "--with-device=ch4:ofi "
configure_cmd += "--without-ze "
print(configure_cmd)
Expand Down
4 changes: 2 additions & 2 deletions contrib/intel/jenkins/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,8 +688,8 @@ def __init__(self, jobname, buildno, testname, core_prov, fabric,
self.pwd = os.getcwd()
self.weekly = weekly
self.mpichtests_exclude = {
'tcp' : { '.' : [('spawn','dir')],
'rma' : [('win_shared_put_flush_load 3', 'test')],
'tcp' : { #'.' : [('spawn','dir')],
# 'rma' : [('win_shared_put_flush_load 3', 'test')],
'threads' : [('spawn','dir')],
'threads/comm' : [('idup_nb 4','test'),
('idup_comm_gen 4','test')],
Expand Down

0 comments on commit 797f630

Please sign in to comment.