From 8941a5459a9787bb08ac1aaca1354085251e8ce7 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 23 Oct 2024 15:33:00 -0700 Subject: [PATCH] Add initial LC GitLab CI (#1904) Co-authored-by: Cole Kendrick --- .gitlab/LC/.gitlab-ci.yml | 95 +++++++++++++++++++++++++++++++++ .gitlab/LC/gitlab_test.sh | 99 +++++++++++++++++++++++++++++++++++ .gitlab/LC/runners/dane.yml | 59 +++++++++++++++++++++ .gitlab/LC/runners/lassen.yml | 34 ++++++++++++ .gitlab/LC/runners/tioga.yml | 60 +++++++++++++++++++++ CMakeLists.txt | 18 +++++-- Tests/CMakeLists.txt | 15 ++++-- Tests/CTestList.cmake | 4 +- 8 files changed, 373 insertions(+), 11 deletions(-) create mode 100644 .gitlab/LC/.gitlab-ci.yml create mode 100755 .gitlab/LC/gitlab_test.sh create mode 100644 .gitlab/LC/runners/dane.yml create mode 100644 .gitlab/LC/runners/lassen.yml create mode 100644 .gitlab/LC/runners/tioga.yml diff --git a/.gitlab/LC/.gitlab-ci.yml b/.gitlab/LC/.gitlab-ci.yml new file mode 100644 index 000000000..c42d6f952 --- /dev/null +++ b/.gitlab/LC/.gitlab-ci.yml @@ -0,0 +1,95 @@ +variables: + CUSTOM_CI_BUILDS_DIR: "/usr/workspace/$$USER/erf_gitlab_runner" + + GIT_STRATEGY: fetch + GIT_SUBMODULE_STRATEGY: recursive + GIT_DEPTH: 1 + GIT_SUBMODULE_DEPTH: 1 + + DEFAULT_BRANCH: llnl/development + + ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} + ALLOC_QUEUE: pci + ALLOC_TIME: 30 + ALLOC_BANK: accatm + + TEST_SCRIPT: .gitlab/LC/gitlab_test.sh + + # Uncomment to disable testing on particular system + #ON_LASSEN: "OFF" + #ON_DANE: "OFF" + #ON_TIOGA: "OFF" + +stages: + - style + - allocate + - build + - release + +workflow: + rules: + # skip running branch pipelines if a MR is open for the branch + - if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push" + when: never + - if: $CI_PIPELINE_SOURCE == 'merge_request_event' + - if: $CI_PIPELINE_SOURCE == 'web' + - if: $CI_COMMIT_TAG + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + # test the upstream branch + - if: $CI_COMMIT_BRANCH == 'development' + # branches starting with "gitlab" + - if: $CI_COMMIT_BRANCH =~ /^gitlab.*/ + +include: + # This include is required for LC with Gitlab 17+ + # Refer to https://hpc.llnl.gov/technical-bulletins/bulletin-568 + - project: 'lc-templates/id_tokens' + file: 'id_tokens.yml' + - .gitlab/LC/runners/lassen.yml + - .gitlab/LC/runners/dane.yml + - .gitlab/LC/runners/tioga.yml + +# Define actual CI jobs here: +check_style: + extends: .on_dane + stage: style + rules: + # always run the style check on any push event + - if: $CI_PIPELINE_SOURCE == "push" + - when: on_success + script: + - echo "Running check_tabs.sh" + - .github/workflows/style/check_tabs.sh + - echo "Running check_trailing_whitespaces.sh" + - .github/workflows/style/check_trailing_whitespaces.sh + +dane_gcc_12_1_1: + variables: + MODULE_LIST: cmake gcc/12.1.1 + extends: .job_on_dane + +lassen_gcc_12_2_1: + variables: + MODULE_LIST: cmake/3.23.1 gcc/12.2.1 + extends: .job_on_lassen + +lassen_gcc_12_2_1_cuda: + variables: + MODULE_LIST: cmake/3.23.1 gcc/12.2.1 cuda/12.2.2 + ERF_ENABLE_CUDA: "ON" + # NOTE: c++ and cc are used here over mpicxx/mpicc due to cmake issue finding mpi with cuda? + CMAKE_CXX_COMPILER: c++ + CMAKE_C_COMPILER: cc + CUDA_ARCH: "70" + ERF_TEST_FCOMPARE_RTOL: "1.0e-8" + ERF_TEST_FCOMPARE_ATOL: "1.0e-9" + extends: .job_on_lassen + +tioga_hip_5.7.1: + variables: + MODULE_LIST: cmake/3.24.2 rocm/6.1.2 rocmcc/6.1.2-cce-18.0.0-magic craype-accel-amd-gfx90a + ERF_ENABLE_HIP: "ON" + AMD_ARCH: "gfx90a" + # NOTE: Running with Debug build type causes AMD linking errors with AMReX plotfiles=ON + BUILD_TYPE: "RelWithDebInfo" + extends: .job_on_tioga diff --git a/.gitlab/LC/gitlab_test.sh b/.gitlab/LC/gitlab_test.sh new file mode 100755 index 000000000..9ca01fbc1 --- /dev/null +++ b/.gitlab/LC/gitlab_test.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o pipefail + +modules=${MODULE_LIST:-""} +mpiexec_executable=${MPIEXEC_EXECUTABLE:-"srun"} +# If using flux, append "run" after the flux executable path +if [[ "${mpiexec_executable}" == "flux" ]] +then + mpiexec_executable="$(which ${mpiexec_executable}) run" + flux jobs + flux resource list +else + mpiexec_executable="$(which ${mpiexec_executable})" +fi + +mpiexec_preflags=${MPIEXEC_PREFLAGS:-""} +host=$(hostname) +build_type=${BUILD_TYPE:-"Debug"} + +ERF_ENABLE_CUDA=${ERF_ENABLE_CUDA:-"OFF"} + +basehost=${host//[[:digit:]]/} + +echo ${host} + +build_dir=build_${host}_${CI_PIPELINE_ID}_$(date +%F_%H_%M_%S) + +if [[ -n ${modules} ]] +then + module load ${modules} +fi + +# Temporary workaround for CUDA builds: +# AMReX fcompare seems to not work as expected if compiled with CUDA. +# This builds a CPU version first and uses that fcompare executable during the +# testing for the CUDA build +if [[ "${ERF_ENABLE_CUDA}" == "ON" ]] +then + echo "=====================================================" + echo "Building CPU version first to get fcompare executable" + echo "=====================================================" + mkdir "${build_dir}_cpu" + cd "${build_dir}_cpu" + pwd + + cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER:-"mpicxx"} \ + -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER:-"mpicc"} \ + -DCMAKE_Fortran_COMPILER:STRING=${CMAKE_Fortran_COMPILER:-"mpifort"} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=OFF \ + -DERF_ENABLE_TESTS:BOOL=OFF \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON .. + make -j fcompare + + FCOMPARE_EXE="$(pwd)/Submodules/AMReX/Tools/Plotfile/amrex_fcompare" + + cd ../ + + echo "=====================================================" + echo "Using fcompare executable at: ${FCOMPARE_EXE}" + echo "=====================================================" +fi + +mkdir ${build_dir} +cd ${build_dir} +pwd + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER:-"mpicxx"} \ + -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER:-"mpicc"} \ + -DCMAKE_Fortran_COMPILER:STRING=${CMAKE_Fortran_COMPILER:-"mpifort"} \ + -DMPIEXEC_EXECUTABLE="${mpiexec_executable}" \ + -DMPIEXEC_PREFLAGS:STRING="${mpiexec_preflags}" \ + -DCMAKE_BUILD_TYPE:STRING="${build_type}" \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL="${ERF_ENABLE_CUDA}" \ + -DAMReX_CUDA_ARCH:STRING="${CUDA_ARCH:-""}" \ + -DERF_ENABLE_HIP:BOOL="${ERF_ENABLE_HIP:-"OFF"}" \ + -DAMReX_AMD_ARCH:STRING="${AMD_ARCH:-""}" \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_TEST_NRANKS:STRING=${ERF_TEST_NRANKS:-"4"} \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DFCOMPARE_EXE="${FCOMPARE_EXE:-"$(pwd)/Submodules/AMReX/Tools/Plotfile/amrex_fcompare"}" \ + -DERF_TEST_FCOMPARE_RTOL="${ERF_TEST_FCOMPARE_RTOL:-"5.0e-9"}" \ + -DERF_TEST_FCOMPARE_ATOL="${ERF_TEST_FCOMPARE_ATOL:-"2.0e-10"}" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. +make -j ${OMP_NUM_THREADS:-16} +ctest -VV --output-on-failure diff --git a/.gitlab/LC/runners/dane.yml b/.gitlab/LC/runners/dane.yml new file mode 100644 index 000000000..5dfa29f33 --- /dev/null +++ b/.gitlab/LC/runners/dane.yml @@ -0,0 +1,59 @@ +.retry: + retry: + max: 2 + when: + - runner_system_failure + +.on_dane: + extends: + - .retry + tags: + - dane + - shell + rules: + - if: '$ON_DANE == "OFF"' + when: never + # test the upstream branch + - if: $CI_COMMIT_BRANCH == 'development' + # branches starting with "gitlab" + - if: $CI_COMMIT_BRANCH =~ /^gitlab.*/ + - if: $CI_PIPELINE_SOURCE == "push" + when: never + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH + - if: '$CI_JOB_NAME =~ /release_resources_dane/' + when: always + - when: on_success + +allocate_resources_dane: + variables: + GIT_STRATEGY: none + extends: + - .on_dane + stage: allocate + script: + - salloc -N 1 --reservation=ci -A ${ALLOC_BANK} --time=${ALLOC_TIME} --no-shell --job-name=${ALLOC_NAME} + +release_resources_dane: + variables: + GIT_STRATEGY: none + extends: + - .on_dane + stage: release + script: + - export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A) + - ([[ -n "${JOBID}" ]] && scancel ${JOBID}) + when: always + +.job_on_dane: + extends: .on_dane + stage: build + needs: ["allocate_resources_dane"] + variables: + MPIEXEC_EXECUTABLE: srun + MPIEXEC_PREFLAGS: "--cpu-bind=cores -v" + script: + - echo "JOB NAME ${ALLOC_NAME}" + - export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A) + - echo "SLURM ID ${JOBID}" + - srun $( [[ -n "${JOBID}" ]] && echo "--jobid=${JOBID}" ) -N 1 -t ${ALLOC_TIME} -v --overlap ${TEST_SCRIPT} diff --git a/.gitlab/LC/runners/lassen.yml b/.gitlab/LC/runners/lassen.yml new file mode 100644 index 000000000..bdefb3f85 --- /dev/null +++ b/.gitlab/LC/runners/lassen.yml @@ -0,0 +1,34 @@ +.retry: + retry: + max: 2 + when: + - runner_system_failure + +.on_lassen: + extends: + - .retry + tags: + - lassen + - shell + rules: + - if: '$ON_LASSEN == "OFF"' + when: never + # test the upstream branch + - if: $CI_COMMIT_BRANCH == 'development' + # branches starting with "gitlab" + - if: $CI_COMMIT_BRANCH =~ /^gitlab.*/ + - if: $CI_PIPELINE_SOURCE == "push" + when: never + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH + - when: on_success + +.job_on_lassen: + extends: .on_lassen + stage: build + needs: [] + variables: + MPIEXEC_EXECUTABLE: jsrun + MPIEXEC_PREFLAGS: "-a 1 -c 1 -g 1" + script: + - bsub -q ${ALLOC_QUEUE} -W ${ALLOC_TIME} -G ${ALLOC_BANK} -J ${ALLOC_NAME} -nnodes 1 -Is ${TEST_SCRIPT} diff --git a/.gitlab/LC/runners/tioga.yml b/.gitlab/LC/runners/tioga.yml new file mode 100644 index 000000000..eecf7f9a3 --- /dev/null +++ b/.gitlab/LC/runners/tioga.yml @@ -0,0 +1,60 @@ +.retry: + retry: + max: 2 + when: + - runner_system_failure + +.on_tioga: + extends: + - .retry + tags: + - tioga + - shell + rules: + - if: '$ON_TIOGA == "OFF"' + when: never + # test the upstream branch + - if: $CI_COMMIT_BRANCH == 'development' + # branches starting with "gitlab" + - if: $CI_COMMIT_BRANCH =~ /^gitlab.*/ + - if: $CI_PIPELINE_SOURCE == "push" + when: never + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH + - if: '$CI_JOB_NAME =~ /release_resources_tioga/' + when: always + - when: on_success + +allocate_resources_tioga: + variables: + GIT_STRATEGY: none + extends: + - .on_tioga + stage: allocate + script: + - flux alloc -N 1 -q ${ALLOC_QUEUE} -t=${ALLOC_TIME} --bg --exclusive --job-name=${ALLOC_NAME} + +release_resources_tioga: + variables: + GIT_STRATEGY: none + extends: + - .on_tioga + stage: release + script: + - export JOBID=$(flux jobs -n --name=${ALLOC_NAME} --format="{id}") + - ([[ -n "${JOBID}" ]] && flux cancel ${JOBID}) + when: always + +.job_on_tioga: + extends: .on_tioga + stage: build + needs: ["allocate_resources_tioga"] + variables: + # Note: "flux" gets expanded to "flux run" inside build script + MPIEXEC_EXECUTABLE: flux + MPIEXEC_PREFLAGS: "-c 1 -g 1 -o mpi-spectrum -o cpu-affinity=per-task -o gpu-affinity=per-task -vv" + script: + - echo "JOB NAME ${ALLOC_NAME}" + - export JOBID=$(flux jobs -n --name=${ALLOC_NAME} --format="{id}") + - echo "FLUX ID ${JOBID}" + - flux proxy $( [[ -n "${JOBID}" ]] && echo "${JOBID}" ) flux run -N 1 -n 1 -c 16 -vv ${TEST_SCRIPT} diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e59c3c5b..03fb265ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,8 +78,13 @@ if (${ERF_USE_INTERNAL_AMREX}) ########################### AMReX ##################################### add_subdirectory(${AMREX_SUBMOD_LOCATION}) - set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare - CACHE INTERNAL "Path to fcompare executable for regression tests") + if(WIN32) + set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/*/amrex_fcompare.exe + CACHE STRING "Path to fcompare executable for regression tests") + else() + set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare + CACHE STRING "Path to fcompare executable for regression tests") + endif() else() set(CMAKE_PREFIX_PATH ${AMREX_DIR} ${CMAKE_PREFIX_PATH}) list(APPEND AMREX_COMPONENTS @@ -109,8 +114,13 @@ else() find_package(AMReX CONFIG REQUIRED COMPONENTS ${AMREX_COMPONENTS}) message(STATUS "Found AMReX = ${AMReX_DIR}") - set(FCOMPARE_EXE ${AMReX_DIR}/../../../bin/amrex_fcompare - CACHE INTERNAL "Path to fcompare executable for regression tests") + if(WIN32) + set(FCOMPARE_EXE ${AMReX_DIR}/../../../*/amrex_fcompare.exe + CACHE STRING "Path to fcompare executable for regression tests") + else() + set(FCOMPARE_EXE ${AMReX_DIR}/../../../bin/amrex_fcompare + CACHE STRING "Path to fcompare executable for regression tests") + endif() endif() ########################## NETCDF ################################## diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 1e1dbaf82..4a5aa706a 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -1,7 +1,12 @@ -if(WIN32) - set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/*/amrex_fcompare.exe CACHE INTERNAL "Path to fcompare executable for regression tests") -else() - set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare CACHE INTERNAL "Path to fcompare executable for regression tests") -endif() +# Additional testing options set(ERF_TEST_NRANKS 2 CACHE STRING "Number of MPI ranks to use for each test") +set(ERF_TEST_FCOMPARE_RTOL "2.0e-10" CACHE STRING "fcompare relative tolerance") +set(ERF_TEST_FCOMPARE_ATOL "2.0e-10" CACHE STRING "fcompare absolute tolerance") + +message(STATUS "ERF testing configuration summary:") +message(STATUS " Number of ranks = ${ERF_TEST_NRANKS}") +message(STATUS " fcompare executable = ${FCOMPARE_EXE}") +message(STATUS " comparison relative tolerance = ${ERF_TEST_FCOMPARE_RTOL}") +message(STATUS " comparison absolute tolerance = ${ERF_TEST_FCOMPARE_ATOL}") + include(${CMAKE_CURRENT_SOURCE_DIR}/CTestList.cmake) diff --git a/Tests/CTestList.cmake b/Tests/CTestList.cmake index 62428e1f5..231bdf855 100644 --- a/Tests/CTestList.cmake +++ b/Tests/CTestList.cmake @@ -43,7 +43,7 @@ function(add_test_r TEST_NAME TEST_EXE PLTFILE) endif() set(TEST_EXE ${CMAKE_BINARY_DIR}/Exec/${TEST_EXE}) - set(FCOMPARE_TOLERANCE "-r 2e-10 --abs_tol 2.0e-10") + set(FCOMPARE_TOLERANCE "-r ${ERF_TEST_FCOMPARE_RTOL} --abs_tol ${ERF_TEST_FCOMPARE_ATOL}") set(FCOMPARE_FLAGS "--abort_if_not_all_found -a ${FCOMPARE_TOLERANCE}") set(test_command sh -c "${MPI_COMMANDS} ${TEST_EXE} ${CURRENT_TEST_BINARY_DIR}/${TEST_NAME}.i ${RUNTIME_OPTIONS} > ${TEST_NAME}.log && ${MPI_FCOMP_COMMANDS} ${FCOMPARE_EXE} ${FCOMPARE_FLAGS} ${PLOT_GOLD} ${CURRENT_TEST_BINARY_DIR}/${PLTFILE}") @@ -85,7 +85,7 @@ function(add_test_0 TEST_NAME TEST_EXE PLTFILE) set(TEST_EXE ${CMAKE_BINARY_DIR}/Exec/${TEST_EXE}) set(FCOMPARE_TOLERANCE "-r 1e-14 --abs_tol 1.0e-14") set(FCOMPARE_FLAGS "-a ${FCOMPARE_TOLERANCE}") - set(test_command sh -c "${MPI_COMMANDS} ${TEST_EXE} ${CURRENT_TEST_BINARY_DIR}/${TEST_NAME}.i erf.input_sounding_file=${CURRENT_TEST_BINARY_DIR}/input_sounding > ${TEST_NAME}.log && ${FCOMPARE_EXE} ${FCOMPARE_FLAGS} ${CURRENT_TEST_BINARY_DIR}/plt00000 ${CURRENT_TEST_BINARY_DIR}/${PLTFILE}") + set(test_command sh -c "${MPI_COMMANDS} ${TEST_EXE} ${CURRENT_TEST_BINARY_DIR}/${TEST_NAME}.i erf.input_sounding_file=${CURRENT_TEST_BINARY_DIR}/input_sounding > ${TEST_NAME}.log && ${MPI_FCOMP_COMMANDS} ${FCOMPARE_EXE} ${FCOMPARE_FLAGS} ${CURRENT_TEST_BINARY_DIR}/plt00000 ${CURRENT_TEST_BINARY_DIR}/${PLTFILE}") add_test(${TEST_NAME} ${test_command}) set_tests_properties(${TEST_NAME}