Skip to content

Commit

Permalink
Add initial LC GitLab CI (erf-model#1904)
Browse files Browse the repository at this point in the history
Co-authored-by: Cole Kendrick <[email protected]>
  • Loading branch information
gardner48 and ckendrick authored Oct 23, 2024
1 parent be89c40 commit 8941a54
Show file tree
Hide file tree
Showing 8 changed files with 373 additions and 11 deletions.
95 changes: 95 additions & 0 deletions .gitlab/LC/.gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
variables:
CUSTOM_CI_BUILDS_DIR: "/usr/workspace/$$USER/erf_gitlab_runner"

GIT_STRATEGY: fetch
GIT_SUBMODULE_STRATEGY: recursive
GIT_DEPTH: 1
GIT_SUBMODULE_DEPTH: 1

DEFAULT_BRANCH: llnl/development

ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID}
ALLOC_QUEUE: pci
ALLOC_TIME: 30
ALLOC_BANK: accatm

TEST_SCRIPT: .gitlab/LC/gitlab_test.sh

# Uncomment to disable testing on particular system
#ON_LASSEN: "OFF"
#ON_DANE: "OFF"
#ON_TIOGA: "OFF"

stages:
- style
- allocate
- build
- release

workflow:
rules:
# skip running branch pipelines if a MR is open for the branch
- if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push"
when: never
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
- if: $CI_PIPELINE_SOURCE == 'web'
- if: $CI_COMMIT_TAG
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# test the upstream branch
- if: $CI_COMMIT_BRANCH == 'development'
# branches starting with "gitlab"
- if: $CI_COMMIT_BRANCH =~ /^gitlab.*/

include:
# This include is required for LC with Gitlab 17+
# Refer to https://hpc.llnl.gov/technical-bulletins/bulletin-568
- project: 'lc-templates/id_tokens'
file: 'id_tokens.yml'
- .gitlab/LC/runners/lassen.yml
- .gitlab/LC/runners/dane.yml
- .gitlab/LC/runners/tioga.yml

# Define actual CI jobs here:
check_style:
extends: .on_dane
stage: style
rules:
# always run the style check on any push event
- if: $CI_PIPELINE_SOURCE == "push"
- when: on_success
script:
- echo "Running check_tabs.sh"
- .github/workflows/style/check_tabs.sh
- echo "Running check_trailing_whitespaces.sh"
- .github/workflows/style/check_trailing_whitespaces.sh

dane_gcc_12_1_1:
variables:
MODULE_LIST: cmake gcc/12.1.1
extends: .job_on_dane

lassen_gcc_12_2_1:
variables:
MODULE_LIST: cmake/3.23.1 gcc/12.2.1
extends: .job_on_lassen

lassen_gcc_12_2_1_cuda:
variables:
MODULE_LIST: cmake/3.23.1 gcc/12.2.1 cuda/12.2.2
ERF_ENABLE_CUDA: "ON"
# NOTE: c++ and cc are used here over mpicxx/mpicc due to cmake issue finding mpi with cuda?
CMAKE_CXX_COMPILER: c++
CMAKE_C_COMPILER: cc
CUDA_ARCH: "70"
ERF_TEST_FCOMPARE_RTOL: "1.0e-8"
ERF_TEST_FCOMPARE_ATOL: "1.0e-9"
extends: .job_on_lassen

tioga_hip_5.7.1:
variables:
MODULE_LIST: cmake/3.24.2 rocm/6.1.2 rocmcc/6.1.2-cce-18.0.0-magic craype-accel-amd-gfx90a
ERF_ENABLE_HIP: "ON"
AMD_ARCH: "gfx90a"
# NOTE: Running with Debug build type causes AMD linking errors with AMReX plotfiles=ON
BUILD_TYPE: "RelWithDebInfo"
extends: .job_on_tioga
99 changes: 99 additions & 0 deletions .gitlab/LC/gitlab_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env bash

set -o errexit
set -o nounset
set -o pipefail

modules=${MODULE_LIST:-""}
mpiexec_executable=${MPIEXEC_EXECUTABLE:-"srun"}
# If using flux, append "run" after the flux executable path
if [[ "${mpiexec_executable}" == "flux" ]]
then
mpiexec_executable="$(which ${mpiexec_executable}) run"
flux jobs
flux resource list
else
mpiexec_executable="$(which ${mpiexec_executable})"
fi

mpiexec_preflags=${MPIEXEC_PREFLAGS:-""}
host=$(hostname)
build_type=${BUILD_TYPE:-"Debug"}

ERF_ENABLE_CUDA=${ERF_ENABLE_CUDA:-"OFF"}

basehost=${host//[[:digit:]]/}

echo ${host}

build_dir=build_${host}_${CI_PIPELINE_ID}_$(date +%F_%H_%M_%S)

if [[ -n ${modules} ]]
then
module load ${modules}
fi

# Temporary workaround for CUDA builds:
# AMReX fcompare seems to not work as expected if compiled with CUDA.
# This builds a CPU version first and uses that fcompare executable during the
# testing for the CUDA build
if [[ "${ERF_ENABLE_CUDA}" == "ON" ]]
then
echo "====================================================="
echo "Building CPU version first to get fcompare executable"
echo "====================================================="
mkdir "${build_dir}_cpu"
cd "${build_dir}_cpu"
pwd

cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \
-DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER:-"mpicxx"} \
-DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER:-"mpicc"} \
-DCMAKE_Fortran_COMPILER:STRING=${CMAKE_Fortran_COMPILER:-"mpifort"} \
-DCMAKE_BUILD_TYPE:STRING=Release \
-DERF_DIM:STRING=3 \
-DERF_ENABLE_MPI:BOOL=ON \
-DERF_ENABLE_CUDA:BOOL=OFF \
-DERF_ENABLE_TESTS:BOOL=OFF \
-DERF_ENABLE_FCOMPARE:BOOL=ON \
-DERF_ENABLE_DOCUMENTATION:BOOL=OFF \
-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON ..
make -j fcompare

FCOMPARE_EXE="$(pwd)/Submodules/AMReX/Tools/Plotfile/amrex_fcompare"

cd ../

echo "====================================================="
echo "Using fcompare executable at: ${FCOMPARE_EXE}"
echo "====================================================="
fi

mkdir ${build_dir}
cd ${build_dir}
pwd

cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \
-DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER:-"mpicxx"} \
-DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER:-"mpicc"} \
-DCMAKE_Fortran_COMPILER:STRING=${CMAKE_Fortran_COMPILER:-"mpifort"} \
-DMPIEXEC_EXECUTABLE="${mpiexec_executable}" \
-DMPIEXEC_PREFLAGS:STRING="${mpiexec_preflags}" \
-DCMAKE_BUILD_TYPE:STRING="${build_type}" \
-DERF_DIM:STRING=3 \
-DERF_ENABLE_MPI:BOOL=ON \
-DERF_ENABLE_CUDA:BOOL="${ERF_ENABLE_CUDA}" \
-DAMReX_CUDA_ARCH:STRING="${CUDA_ARCH:-""}" \
-DERF_ENABLE_HIP:BOOL="${ERF_ENABLE_HIP:-"OFF"}" \
-DAMReX_AMD_ARCH:STRING="${AMD_ARCH:-""}" \
-DERF_ENABLE_TESTS:BOOL=ON \
-DERF_TEST_NRANKS:STRING=${ERF_TEST_NRANKS:-"4"} \
-DERF_ENABLE_FCOMPARE:BOOL=ON \
-DERF_ENABLE_DOCUMENTATION:BOOL=OFF \
-DFCOMPARE_EXE="${FCOMPARE_EXE:-"$(pwd)/Submodules/AMReX/Tools/Plotfile/amrex_fcompare"}" \
-DERF_TEST_FCOMPARE_RTOL="${ERF_TEST_FCOMPARE_RTOL:-"5.0e-9"}" \
-DERF_TEST_FCOMPARE_ATOL="${ERF_TEST_FCOMPARE_ATOL:-"2.0e-10"}" \
-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \
..
make -j ${OMP_NUM_THREADS:-16}
ctest -VV --output-on-failure
59 changes: 59 additions & 0 deletions .gitlab/LC/runners/dane.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
.retry:
retry:
max: 2
when:
- runner_system_failure

.on_dane:
extends:
- .retry
tags:
- dane
- shell
rules:
- if: '$ON_DANE == "OFF"'
when: never
# test the upstream branch
- if: $CI_COMMIT_BRANCH == 'development'
# branches starting with "gitlab"
- if: $CI_COMMIT_BRANCH =~ /^gitlab.*/
- if: $CI_PIPELINE_SOURCE == "push"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH
- if: '$CI_JOB_NAME =~ /release_resources_dane/'
when: always
- when: on_success

allocate_resources_dane:
variables:
GIT_STRATEGY: none
extends:
- .on_dane
stage: allocate
script:
- salloc -N 1 --reservation=ci -A ${ALLOC_BANK} --time=${ALLOC_TIME} --no-shell --job-name=${ALLOC_NAME}

release_resources_dane:
variables:
GIT_STRATEGY: none
extends:
- .on_dane
stage: release
script:
- export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A)
- ([[ -n "${JOBID}" ]] && scancel ${JOBID})
when: always

.job_on_dane:
extends: .on_dane
stage: build
needs: ["allocate_resources_dane"]
variables:
MPIEXEC_EXECUTABLE: srun
MPIEXEC_PREFLAGS: "--cpu-bind=cores -v"
script:
- echo "JOB NAME ${ALLOC_NAME}"
- export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A)
- echo "SLURM ID ${JOBID}"
- srun $( [[ -n "${JOBID}" ]] && echo "--jobid=${JOBID}" ) -N 1 -t ${ALLOC_TIME} -v --overlap ${TEST_SCRIPT}
34 changes: 34 additions & 0 deletions .gitlab/LC/runners/lassen.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
.retry:
retry:
max: 2
when:
- runner_system_failure

.on_lassen:
extends:
- .retry
tags:
- lassen
- shell
rules:
- if: '$ON_LASSEN == "OFF"'
when: never
# test the upstream branch
- if: $CI_COMMIT_BRANCH == 'development'
# branches starting with "gitlab"
- if: $CI_COMMIT_BRANCH =~ /^gitlab.*/
- if: $CI_PIPELINE_SOURCE == "push"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH
- when: on_success

.job_on_lassen:
extends: .on_lassen
stage: build
needs: []
variables:
MPIEXEC_EXECUTABLE: jsrun
MPIEXEC_PREFLAGS: "-a 1 -c 1 -g 1"
script:
- bsub -q ${ALLOC_QUEUE} -W ${ALLOC_TIME} -G ${ALLOC_BANK} -J ${ALLOC_NAME} -nnodes 1 -Is ${TEST_SCRIPT}
60 changes: 60 additions & 0 deletions .gitlab/LC/runners/tioga.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
.retry:
retry:
max: 2
when:
- runner_system_failure

.on_tioga:
extends:
- .retry
tags:
- tioga
- shell
rules:
- if: '$ON_TIOGA == "OFF"'
when: never
# test the upstream branch
- if: $CI_COMMIT_BRANCH == 'development'
# branches starting with "gitlab"
- if: $CI_COMMIT_BRANCH =~ /^gitlab.*/
- if: $CI_PIPELINE_SOURCE == "push"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $DEFAULT_BRANCH
- if: '$CI_JOB_NAME =~ /release_resources_tioga/'
when: always
- when: on_success

allocate_resources_tioga:
variables:
GIT_STRATEGY: none
extends:
- .on_tioga
stage: allocate
script:
- flux alloc -N 1 -q ${ALLOC_QUEUE} -t=${ALLOC_TIME} --bg --exclusive --job-name=${ALLOC_NAME}

release_resources_tioga:
variables:
GIT_STRATEGY: none
extends:
- .on_tioga
stage: release
script:
- export JOBID=$(flux jobs -n --name=${ALLOC_NAME} --format="{id}")
- ([[ -n "${JOBID}" ]] && flux cancel ${JOBID})
when: always

.job_on_tioga:
extends: .on_tioga
stage: build
needs: ["allocate_resources_tioga"]
variables:
# Note: "flux" gets expanded to "flux run" inside build script
MPIEXEC_EXECUTABLE: flux
MPIEXEC_PREFLAGS: "-c 1 -g 1 -o mpi-spectrum -o cpu-affinity=per-task -o gpu-affinity=per-task -vv"
script:
- echo "JOB NAME ${ALLOC_NAME}"
- export JOBID=$(flux jobs -n --name=${ALLOC_NAME} --format="{id}")
- echo "FLUX ID ${JOBID}"
- flux proxy $( [[ -n "${JOBID}" ]] && echo "${JOBID}" ) flux run -N 1 -n 1 -c 16 -vv ${TEST_SCRIPT}
18 changes: 14 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,13 @@ if (${ERF_USE_INTERNAL_AMREX})
########################### AMReX #####################################

add_subdirectory(${AMREX_SUBMOD_LOCATION})
set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare
CACHE INTERNAL "Path to fcompare executable for regression tests")
if(WIN32)
set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/*/amrex_fcompare.exe
CACHE STRING "Path to fcompare executable for regression tests")
else()
set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare
CACHE STRING "Path to fcompare executable for regression tests")
endif()
else()
set(CMAKE_PREFIX_PATH ${AMREX_DIR} ${CMAKE_PREFIX_PATH})
list(APPEND AMREX_COMPONENTS
Expand Down Expand Up @@ -109,8 +114,13 @@ else()
find_package(AMReX CONFIG REQUIRED
COMPONENTS ${AMREX_COMPONENTS})
message(STATUS "Found AMReX = ${AMReX_DIR}")
set(FCOMPARE_EXE ${AMReX_DIR}/../../../bin/amrex_fcompare
CACHE INTERNAL "Path to fcompare executable for regression tests")
if(WIN32)
set(FCOMPARE_EXE ${AMReX_DIR}/../../../*/amrex_fcompare.exe
CACHE STRING "Path to fcompare executable for regression tests")
else()
set(FCOMPARE_EXE ${AMReX_DIR}/../../../bin/amrex_fcompare
CACHE STRING "Path to fcompare executable for regression tests")
endif()
endif()

########################## NETCDF ##################################
Expand Down
Loading

0 comments on commit 8941a54

Please sign in to comment.