From ceb45ee1ea5988df15ff8b588046c6b0529c0bae Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Wed, 11 Oct 2023 10:47:10 -0700 Subject: [PATCH 01/26] Set of files to be modified for containergpucuda --- configs/containergpucuda/compilers.yaml | 13 +++++ configs/containergpucuda/config.yaml | 2 + configs/containergpucuda/packages.yaml | 58 +++++++++++++++++++ exawind_containergpucuda.yaml | 9 +++ scripts/create-exawind-snapshot.sh | 6 ++ .../cmd/manager_cmds/find_machine.py | 3 + 6 files changed, 91 insertions(+) create mode 100644 configs/containergpucuda/compilers.yaml create mode 100644 configs/containergpucuda/config.yaml create mode 100644 configs/containergpucuda/packages.yaml create mode 100644 exawind_containergpucuda.yaml diff --git a/configs/containergpucuda/compilers.yaml b/configs/containergpucuda/compilers.yaml new file mode 100644 index 00000000..10686fe8 --- /dev/null +++ b/configs/containergpucuda/compilers.yaml @@ -0,0 +1,13 @@ +compilers: +- compiler: + spec: gcc@11.4.0 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: ubuntu22.04 + target: any + modules: [] + extra_rpaths: [] diff --git a/configs/containergpucuda/config.yaml b/configs/containergpucuda/config.yaml new file mode 100644 index 00000000..874ad986 --- /dev/null +++ b/configs/containergpucuda/config.yaml @@ -0,0 +1,2 @@ +config: + build_jobs: 32 diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml new file mode 100644 index 00000000..23fee1e2 --- /dev/null +++ b/configs/containergpucuda/packages.yaml @@ -0,0 +1,58 @@ +packages: +# Global settings + all: + compiler: + - gcc@11.4.0 + providers: + mpi: [mpich] + blas: [netlib-lapack] + lapack: [netlib-lapack] + variants: build_type=Release +# External packages from nvidia base image (Ubuntu-22.04) + mpi: + require: mpich +# Package preferences to be built by Spack for correct Exawind +# Nota bene: use libtool from Spack for correct linking + ascent: + variants: ~fortran~openmp + amr-wind: + variants: +tiny_profile + conduit: + variants: ~fortran~hdf5_compat + boost: + version: [1.78.0] + variants: cxxstd=17 + cmake: + version: [3.26.3] + variants: build_type=Release + trilinos: + require: + - any_of: ["@13.4.0", "@develop"] + hdf5: + version: [1.10.7] + variants: +cxx+hl + libtool: + version: [2.4.7] + masa: + variants: ~fortran~python + netcdf-c: + require: '@4.7.4' + variants: +parallel-netcdf maxdims=65536 maxvars=524288 + openfast: + version: [master] + variants: +cxx + parallel-netcdf: + version: [1.12.2] + variants: ~fortran + perl: + require: '@5.34.1' + tioga: + version: [develop] + hypre: + require: '@develop' + variants: ~fortran + hypre2: + require: '@develop' + variants: ~fortran + yaml-cpp: + version: [0.6.3] diff --git a/exawind_containergpucuda.yaml b/exawind_containergpucuda.yaml new file mode 100644 index 00000000..d2510c2e --- /dev/null +++ b/exawind_containergpucuda.yaml @@ -0,0 +1,9 @@ +spack: + include: + - include.yaml + concretizer: + unify: false + reuse: false + view: false + specs: + - 'exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu' diff --git a/scripts/create-exawind-snapshot.sh b/scripts/create-exawind-snapshot.sh index 2dcef826..9b2f0aa9 100755 --- a/scripts/create-exawind-snapshot.sh +++ b/scripts/create-exawind-snapshot.sh @@ -1,4 +1,7 @@ #!/bin/bash -l + hwloc-nox \ + libhwloc-dev \ + libhwloc15 \ # # Copyright (c) 2022, National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. @@ -52,6 +55,9 @@ elif [[ "${SPACK_MANAGER_MACHINE}" == "summit" ]]; then elif [[ "${SPACK_MANAGER_MACHINE}" == "perlmutter" ]]; then NUM_CORES=8 cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" +elif [[ "${SPACK_MANAGER_MACHINE}" == "containergpucuda" ]]; then + NUM_CORES=8 + cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "snl-hpc" ]]; then # TODO we should probably launch the install through slurm and exit on this one cmd "nice -n19 spack manager snapshot -s exawind+hypre+openfast amr-wind+hypre+openfast" diff --git a/spack-scripting/scripting/cmd/manager_cmds/find_machine.py b/spack-scripting/scripting/cmd/manager_cmds/find_machine.py index 34cc495e..087e93eb 100644 --- a/spack-scripting/scripting/cmd/manager_cmds/find_machine.py +++ b/spack-scripting/scripting/cmd/manager_cmds/find_machine.py @@ -97,6 +97,9 @@ def is_e4s(): "perlmutter": MachineData( lambda: os.environ["NERSC_HOST"] == "perlmutter", "perlmutter-p1.nersc.gov" ), + "containergpucuda": MachineData( + lambda: os.environ["CONTAINER_BUILD"] == "gpucuda", "containgpucuda.nodomain.gov" + ), # General "darwin": MachineData(lambda: sys.platform == "darwin", "darwin.nodomain.gov"), } From 9b8a663b2f084f2225eb4b20082f69a0448481ef Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 17 Oct 2023 14:54:24 -0700 Subject: [PATCH 02/26] Dockerfile-containergpucuda: basic instance --- .../Dockerfile-containergpucuda | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda new file mode 100644 index 00000000..816b5c3a --- /dev/null +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -0,0 +1,85 @@ +MAINTAINER Philip Sakievich, Sandia National Laboratories +MAINTAINER Jon Rood, NREL + +# NVIDIA base images: https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags + +ARG REGISTRY=nvcr.io/nvidia +ARG IMAGE=cuda +ARG TAG=12.2.0-devel-ubuntu22.04 + +FROM ${REGISTRY}/${IMAGE}:${TAG} + +# Make bash the default $SHELL +SHELL ["/bin/bash", "-c"] + +# Install Spack Prereqs: https://spack.readthedocs.io/en/latest/getting_started.html#system-prerequisites + +RUN apt-get update -y && \ + apt-get -y upgrade && \ + apt-get -y install \ + autoconf \ + automake \ + clangd \ + curl \ + emacs-nox \ + file \ + flex \ + gcc \ + gcc-multilib \ + gcc-doc \ + g++ \ + gfortran \ + gfortran-multilib \ + gfortran-doc \ + git \ + git-doc \ + git-man \ + libffi-dev \ + libfmt-dev \ + libgmp-dev \ + libjpeg-dev \ + libmpc-dev \ + libx11-dev \ + lsb-release \ + m4 \ + nano \ + python3 \ + python3-distutils \ + python3-venv \ + unzip \ + vim \ + wget \ + wget2 \ + zip \ + zlib1g-dev + +RUN apt clean -y + +# Exawind GPU snapshot +WORKDIR /exawind-entry +#RUN git clone --recursive https://github.com/sandialabs/spack-manager +# Pre-merge fork +#RUN git clone --recursive https://github.com/ajpowelsnl/spack-manager +# Needed by "create-exawind-snapshot.sh" +ENV SPACK_MANAGER_MACHINE=containergpucuda +ENV CONTAINER_BUILD=gpucuda +ENV SPACK_MANAGER=/exawind-entry/spack-manager + +#WORKDIR /exawind-entry/spack-manager + +# Pre-merge branch from ajpowelsnl/spack-manager fork +#RUN git checkout gpucontainer + +# Snapshot will be generated upon running container +#RUN echo "export SPACK_MANAGER=$SPACK_MANAGER" >> /etc/bash.bashrc && \ +# echo "source $SPACK_MANAGER/start.sh && spack-start" >> /etc/bash.bashrc && \ +# echo "spack external find" >> /etc/bash.bashrc && \ +# echo "spack config add packages:all:target:[$uarch]" >> /etc/bash.bashrc && \ +# echo "$SPACK_MANAGER/scripts/create-exawind-snapshot.sh" >> /etc/bash.bashrc && \ +# echo "spack clean -a" >> /etc/bash.bashrc + +# Verify .bashrc +# RUN ["/bin/bash", "-c", "tail -n 6 /etc/bash.bashrc"] + +WORKDIR /exawind-entry +CMD [ "/bin/bash" ] From 163c584c8aa67f30f34b0f2123848a99591b22ff Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Wed, 18 Oct 2023 13:38:39 -0700 Subject: [PATCH 03/26] First draft of GPU container (Perlmutter) --- configs/containergpucuda/compilers.yaml | 4 +- configs/containergpucuda/packages.yaml | 37 ++++++++++++++++++- .../Dockerfile-containergpucuda | 9 +++-- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/configs/containergpucuda/compilers.yaml b/configs/containergpucuda/compilers.yaml index 10686fe8..6068cc28 100644 --- a/configs/containergpucuda/compilers.yaml +++ b/configs/containergpucuda/compilers.yaml @@ -1,6 +1,6 @@ compilers: - compiler: - spec: gcc@11.4.0 + spec: nvcc@=12.2.91 paths: cc: /usr/bin/gcc cxx: /usr/bin/g++ @@ -11,3 +11,5 @@ compilers: target: any modules: [] extra_rpaths: [] + +# See example: https://spack.readthedocs.io/en/latest/gpu_configuration.html diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index 23fee1e2..14418469 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -8,9 +8,44 @@ packages: blas: [netlib-lapack] lapack: [netlib-lapack] variants: build_type=Release -# External packages from nvidia base image (Ubuntu-22.04) mpi: require: mpich +# GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml + mpich: + require: + - one_of: ["+cuda", "+rocm"] +# Use CUDA resources from NVIDIA base image: +# See example: https://spack.readthedocs.io/en/latest/gpu_configuration.html + cuda: + buildable: false + externals: + - spec: cuda@12.2.91 + prefix: /usr/local/cuda + cusparse: + buildable: false + externals: + - spec: cuda@12.2.91 + prefix: /usr/local/cuda + cublas: + buildable: false + externals: + - spec: cuda@12.2.91 + prefix: /usr/local/cuda + cusolver: + buildable: false + externals: + - spec: cuda@12.2.91 + prefix: /usr/local/cuda + cufft: + buildable: false + externals: + - spec: cuda@12.2.91 + prefix: /usr/local/cuda +# Use Ubuntu libncurses-dev, b/c Spack version fails + ncurses: + externals: + - spec: ncurses@6.3 + prefix: /usr # Package preferences to be built by Spack for correct Exawind # Nota bene: use libtool from Spack for correct linking ascent: diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index 816b5c3a..f4bd3fae 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -39,6 +39,9 @@ RUN apt-get update -y && \ libgmp-dev \ libjpeg-dev \ libmpc-dev \ + libnccl2 \ + libnccl-dev \ + libncurses-dev \ libx11-dev \ lsb-release \ m4 \ @@ -59,16 +62,16 @@ RUN apt clean -y WORKDIR /exawind-entry #RUN git clone --recursive https://github.com/sandialabs/spack-manager # Pre-merge fork -#RUN git clone --recursive https://github.com/ajpowelsnl/spack-manager +RUN git clone --recursive https://github.com/ajpowelsnl/spack-manager # Needed by "create-exawind-snapshot.sh" ENV SPACK_MANAGER_MACHINE=containergpucuda ENV CONTAINER_BUILD=gpucuda ENV SPACK_MANAGER=/exawind-entry/spack-manager -#WORKDIR /exawind-entry/spack-manager +WORKDIR /exawind-entry/spack-manager # Pre-merge branch from ajpowelsnl/spack-manager fork -#RUN git checkout gpucontainer +RUN git checkout gpucontainer # Snapshot will be generated upon running container #RUN echo "export SPACK_MANAGER=$SPACK_MANAGER" >> /etc/bash.bashrc && \ From 72d73302873a2cb96cad9ef94ca54b7d7947fa03 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 19 Oct 2023 08:42:42 -0700 Subject: [PATCH 04/26] create-exawind-snapshot.sh: clean up, add `-d` to gpucontainer --- .../Dockerfile-containergpucuda | 9 ++++++--- scripts/create-exawind-snapshot.sh | 7 ++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index f4bd3fae..76285524 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -34,9 +34,13 @@ RUN apt-get update -y && \ git \ git-doc \ git-man \ + hwloc-nox \ libffi-dev \ libfmt-dev \ libgmp-dev \ + libhwloc-common \ + libhwloc-dev \ + libhwloc15 \ libjpeg-dev \ libmpc-dev \ libnccl2 \ @@ -76,13 +80,12 @@ RUN git checkout gpucontainer # Snapshot will be generated upon running container #RUN echo "export SPACK_MANAGER=$SPACK_MANAGER" >> /etc/bash.bashrc && \ # echo "source $SPACK_MANAGER/start.sh && spack-start" >> /etc/bash.bashrc && \ -# echo "spack external find" >> /etc/bash.bashrc && \ -# echo "spack config add packages:all:target:[$uarch]" >> /etc/bash.bashrc && \ +# echo "spack external find --all" >> /etc/bash.bashrc && \ # echo "$SPACK_MANAGER/scripts/create-exawind-snapshot.sh" >> /etc/bash.bashrc && \ # echo "spack clean -a" >> /etc/bash.bashrc # Verify .bashrc # RUN ["/bin/bash", "-c", "tail -n 6 /etc/bash.bashrc"] -WORKDIR /exawind-entry +#WORKDIR /exawind-entry CMD [ "/bin/bash" ] diff --git a/scripts/create-exawind-snapshot.sh b/scripts/create-exawind-snapshot.sh index 9b2f0aa9..c2b04e69 100755 --- a/scripts/create-exawind-snapshot.sh +++ b/scripts/create-exawind-snapshot.sh @@ -1,7 +1,4 @@ -#!/bin/bash -l - hwloc-nox \ - libhwloc-dev \ - libhwloc15 \ +#!/bin/bash # # Copyright (c) 2022, National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. @@ -57,7 +54,7 @@ elif [[ "${SPACK_MANAGER_MACHINE}" == "perlmutter" ]]; then cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "containergpucuda" ]]; then NUM_CORES=8 - cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" + cmd "nice -n19 spack -d manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "snl-hpc" ]]; then # TODO we should probably launch the install through slurm and exit on this one cmd "nice -n19 spack manager snapshot -s exawind+hypre+openfast amr-wind+hypre+openfast" From 51e356dc51ad1ba4406a8eb488d477bcc4dabff1 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 19 Oct 2023 10:25:54 -0700 Subject: [PATCH 05/26] exawind_containergpucuda.yaml: cuda container build --- env-templates/exawind_containergpucuda.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 env-templates/exawind_containergpucuda.yaml diff --git a/env-templates/exawind_containergpucuda.yaml b/env-templates/exawind_containergpucuda.yaml new file mode 100644 index 00000000..d05720c3 --- /dev/null +++ b/env-templates/exawind_containergpucuda.yaml @@ -0,0 +1,9 @@ +spack: + include: + - include.yaml + concretizer: + unify: false + reuse: false + view: false + specs: + - 'exawind+hypre+amr_wind_gpu+nalu_wind_gpu+cuda' From b26cfdb4bab27d9a08260ad7b5f5928d23354fbe Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 19 Oct 2023 10:39:47 -0700 Subject: [PATCH 06/26] GPU configs udpates --- configs/containergpucuda/compilers.yaml | 2 +- configs/containergpucuda/packages.yaml | 32 ++----------------------- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/configs/containergpucuda/compilers.yaml b/configs/containergpucuda/compilers.yaml index 6068cc28..7bf2d53f 100644 --- a/configs/containergpucuda/compilers.yaml +++ b/configs/containergpucuda/compilers.yaml @@ -1,6 +1,6 @@ compilers: - compiler: - spec: nvcc@=12.2.91 + spec: gcc@11.4.0 paths: cc: /usr/bin/gcc cxx: /usr/bin/g++ diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index 14418469..ad76ed30 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -7,40 +7,12 @@ packages: mpi: [mpich] blas: [netlib-lapack] lapack: [netlib-lapack] - variants: build_type=Release + variants: build_type=Release cuda_arch=80 mpi: require: mpich # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml mpich: - require: - - one_of: ["+cuda", "+rocm"] -# Use CUDA resources from NVIDIA base image: -# See example: https://spack.readthedocs.io/en/latest/gpu_configuration.html - cuda: - buildable: false - externals: - - spec: cuda@12.2.91 - prefix: /usr/local/cuda - cusparse: - buildable: false - externals: - - spec: cuda@12.2.91 - prefix: /usr/local/cuda - cublas: - buildable: false - externals: - - spec: cuda@12.2.91 - prefix: /usr/local/cuda - cusolver: - buildable: false - externals: - - spec: cuda@12.2.91 - prefix: /usr/local/cuda - cufft: - buildable: false - externals: - - spec: cuda@12.2.91 - prefix: /usr/local/cuda + require: "+cuda" # Use Ubuntu libncurses-dev, b/c Spack version fails ncurses: externals: From e87dd67c9773b75433bb69033fcb44c909163a3e Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 19 Oct 2023 10:44:30 -0700 Subject: [PATCH 07/26] create-exawind-snapshot.sh: rm NUM_CORES --- scripts/create-exawind-snapshot.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/create-exawind-snapshot.sh b/scripts/create-exawind-snapshot.sh index c2b04e69..536f7ce9 100755 --- a/scripts/create-exawind-snapshot.sh +++ b/scripts/create-exawind-snapshot.sh @@ -53,7 +53,6 @@ elif [[ "${SPACK_MANAGER_MACHINE}" == "perlmutter" ]]; then NUM_CORES=8 cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "containergpucuda" ]]; then - NUM_CORES=8 cmd "nice -n19 spack -d manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "snl-hpc" ]]; then # TODO we should probably launch the install through slurm and exit on this one From 6dea5984f020eaa95c0c022b3d295d6b80d233c0 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 19 Oct 2023 15:17:29 -0700 Subject: [PATCH 08/26] Restore num jobs, NUM_CORES=8 --- configs/containergpucuda/config.yaml | 2 +- .../Dockerfile-containergpucuda | 12 ++++++++++-- scripts/create-exawind-snapshot.sh | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/configs/containergpucuda/config.yaml b/configs/containergpucuda/config.yaml index 874ad986..b7797366 100644 --- a/configs/containergpucuda/config.yaml +++ b/configs/containergpucuda/config.yaml @@ -1,2 +1,2 @@ config: - build_jobs: 32 + build_jobs: 20 diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index 76285524..f8b58ff7 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -15,11 +15,15 @@ SHELL ["/bin/bash", "-c"] # Install Spack Prereqs: https://spack.readthedocs.io/en/latest/getting_started.html#system-prerequisites RUN apt-get update -y && \ - apt-get -y upgrade && \ - apt-get -y install \ + apt-get upgrade -y + +RUN apt-get install -y \ autoconf \ automake \ + bzip2 \ + ca-certificates \ clangd \ + coreutils \ curl \ emacs-nox \ file \ @@ -34,7 +38,9 @@ RUN apt-get update -y && \ git \ git-doc \ git-man \ + gnupg2 \ hwloc-nox \ + libbz2-dev \ libffi-dev \ libfmt-dev \ libgmp-dev \ @@ -49,6 +55,7 @@ RUN apt-get update -y && \ libx11-dev \ lsb-release \ m4 \ + make \ nano \ python3 \ python3-distutils \ @@ -57,6 +64,7 @@ RUN apt-get update -y && \ vim \ wget \ wget2 \ + xz-utils \ zip \ zlib1g-dev diff --git a/scripts/create-exawind-snapshot.sh b/scripts/create-exawind-snapshot.sh index 536f7ce9..444f9fc6 100755 --- a/scripts/create-exawind-snapshot.sh +++ b/scripts/create-exawind-snapshot.sh @@ -54,6 +54,7 @@ elif [[ "${SPACK_MANAGER_MACHINE}" == "perlmutter" ]]; then cmd "nice -n19 spack manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" elif [[ "${SPACK_MANAGER_MACHINE}" == "containergpucuda" ]]; then cmd "nice -n19 spack -d manager snapshot -m -s exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu" + NUM_CORES=8 elif [[ "${SPACK_MANAGER_MACHINE}" == "snl-hpc" ]]; then # TODO we should probably launch the install through slurm and exit on this one cmd "nice -n19 spack manager snapshot -s exawind+hypre+openfast amr-wind+hypre+openfast" From e3f07adfffc91f72da7318021209661fb1f9e6b9 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Fri, 20 Oct 2023 10:28:20 -0700 Subject: [PATCH 09/26] Apply yaksa-cuda.patch in spack repo --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index ee68baf2..e3abe4ce 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit ee68baf254ce8f401704ef1a62b77057487d4a12 +Subproject commit e3abe4cefecaf01f0bf34137a231e312580f4bea From 5ba6683c134274bf652928d052f9d914033c6075 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Fri, 20 Oct 2023 12:22:44 -0700 Subject: [PATCH 10/26] cuda_arch=70 for lassen --- configs/containergpucuda/packages.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index ad76ed30..dc49ddf0 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -7,7 +7,7 @@ packages: mpi: [mpich] blas: [netlib-lapack] lapack: [netlib-lapack] - variants: build_type=Release cuda_arch=80 + variants: build_type=Release cuda_arch=70 mpi: require: mpich # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml From ef9592ab48e04ffea6a62dee5ede0011392cb5a3 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Mon, 30 Oct 2023 11:15:13 -0700 Subject: [PATCH 11/26] update dependencies' build --- configs/containergpucuda/packages.yaml | 10 +++++++++- .../Dockerfile-containergpucuda | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index dc49ddf0..cb3521c5 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -13,11 +13,19 @@ packages: # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml mpich: require: "+cuda" -# Use Ubuntu libncurses-dev, b/c Spack version fails +# Use Ubuntu libncurses-dev, etc., b/c Spack version fails ncurses: externals: - spec: ncurses@6.3 prefix: /usr + gdbm: + externals: + - spec: gdbm@1.23 + prefix: /usr + gdbm6: + externals: + - spec: gdbm6@1.23 + prefix: /usr # Package preferences to be built by Spack for correct Exawind # Nota bene: use libtool from Spack for correct linking ascent: diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index f8b58ff7..abc91e26 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -43,6 +43,8 @@ RUN apt-get install -y \ libbz2-dev \ libffi-dev \ libfmt-dev \ + libgdbm-dev \ + libgdbm6 \ libgmp-dev \ libhwloc-common \ libhwloc-dev \ From ab0e26c865513b964614dd4917e860bb01f649d4 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Mon, 30 Oct 2023 14:49:45 -0700 Subject: [PATCH 12/26] Adjust base image to cuda-11.8.0 --- configs/containergpucuda/packages.yaml | 2 +- exawind_containergpucuda.yaml | 9 --------- .../Dockerfile-containergpucuda | 6 +++--- spack | 2 +- 4 files changed, 5 insertions(+), 14 deletions(-) delete mode 100644 exawind_containergpucuda.yaml diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index cb3521c5..96b76ecd 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -7,7 +7,7 @@ packages: mpi: [mpich] blas: [netlib-lapack] lapack: [netlib-lapack] - variants: build_type=Release cuda_arch=70 + variants: build_type=Release cuda_arch=80 mpi: require: mpich # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml diff --git a/exawind_containergpucuda.yaml b/exawind_containergpucuda.yaml deleted file mode 100644 index d2510c2e..00000000 --- a/exawind_containergpucuda.yaml +++ /dev/null @@ -1,9 +0,0 @@ -spack: - include: - - include.yaml - concretizer: - unify: false - reuse: false - view: false - specs: - - 'exawind%gcc+hypre+cuda+amr_wind_gpu+nalu_wind_gpu' diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index abc91e26..b507d7e0 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -5,7 +5,9 @@ MAINTAINER Jon Rood, NREL ARG REGISTRY=nvcr.io/nvidia ARG IMAGE=cuda -ARG TAG=12.2.0-devel-ubuntu22.04 +ARG TAG=11.8.0-devel-ubuntu22.04 +#ARG TAG=12.1.0-devel-ubuntu22.04 +#ARG TAG=12.2.0-devel-ubuntu22.04 FROM ${REGISTRY}/${IMAGE}:${TAG} @@ -51,8 +53,6 @@ RUN apt-get install -y \ libhwloc15 \ libjpeg-dev \ libmpc-dev \ - libnccl2 \ - libnccl-dev \ libncurses-dev \ libx11-dev \ lsb-release \ diff --git a/spack b/spack index e3abe4ce..74031c23 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit e3abe4cefecaf01f0bf34137a231e312580f4bea +Subproject commit 74031c2386b5d33e76e3a5c20a8fec7707822b77 From b9ddae4d05d49c642e257f2541637346b3dc7c76 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Mon, 30 Oct 2023 14:51:02 -0700 Subject: [PATCH 13/26] Dockerfile-containergpucuda: rm nccl --- .../exawind_container_gpucuda/Dockerfile-containergpucuda | 2 -- 1 file changed, 2 deletions(-) diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index b507d7e0..8db56357 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -6,8 +6,6 @@ MAINTAINER Jon Rood, NREL ARG REGISTRY=nvcr.io/nvidia ARG IMAGE=cuda ARG TAG=11.8.0-devel-ubuntu22.04 -#ARG TAG=12.1.0-devel-ubuntu22.04 -#ARG TAG=12.2.0-devel-ubuntu22.04 FROM ${REGISTRY}/${IMAGE}:${TAG} From 7bfbbcd51a710139ca61d73d59b65fc206d1c062 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 14:33:12 -0700 Subject: [PATCH 14/26] Spack patch for yaksa --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 74031c23..84508e25 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 74031c2386b5d33e76e3a5c20a8fec7707822b77 +Subproject commit 84508e256dfa1e382c33d7d1cfcaeccf06999c1e From 77bf1fd4fca193b78d5e9f5e01538277700a05aa Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 14:45:57 -0700 Subject: [PATCH 15/26] Spack yaska patch: from wyphan --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 84508e25..eea34c21 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 84508e256dfa1e382c33d7d1cfcaeccf06999c1e +Subproject commit eea34c2113590df4257987fd5ecd75010b3d8b9c From 6b1d70eff5d7c67fedce7accc50dd0e8a69b48c4 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 14:57:45 -0700 Subject: [PATCH 16/26] rm incorrect yaksa patch --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index eea34c21..a6c0138c 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit eea34c2113590df4257987fd5ecd75010b3d8b9c +Subproject commit a6c0138cd499951beca82e149ab612c58727b10a From bfcfc4969a7e008dbc7902704bd43bbb52871a94 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 15:03:34 -0700 Subject: [PATCH 17/26] spack: yaksa patch do-over w/ wyphan --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index a6c0138c..09fe3347 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit a6c0138cd499951beca82e149ab612c58727b10a +Subproject commit 09fe33477f6279f833133e681b056d8cd009581a From 6f067a0e7b618e7b8651adfb72e426d2f8e8ba6f Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 15:19:57 -0700 Subject: [PATCH 18/26] Rescue plan for yaska patch --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 09fe3347..4822cde3 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 09fe33477f6279f833133e681b056d8cd009581a +Subproject commit 4822cde38da77fc5c05531fafc4c5a00f1342f55 From f9b6f5dee0803c7ce10c9ec2857e8234b961a045 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 15:43:52 -0700 Subject: [PATCH 19/26] More rescue: reset spack to ee68baf254ce8f401704ef1a62b77057487d4a12 --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 4822cde3..ee68baf2 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 4822cde38da77fc5c05531fafc4c5a00f1342f55 +Subproject commit ee68baf254ce8f401704ef1a62b77057487d4a12 From 5c46ea240fe936354f4dc7506ceba0a9d525a3d4 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Tue, 31 Oct 2023 16:29:35 -0700 Subject: [PATCH 20/26] Add sha (6c1868f8ae) with yaska-0.3 --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index ee68baf2..6c1868f8 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit ee68baf254ce8f401704ef1a62b77057487d4a12 +Subproject commit 6c1868f8ae48e9547cfc66cd902ee7bd32f29148 From c0014643210f15dba3475d3cd985f9e17363df85 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Wed, 1 Nov 2023 09:18:57 -0700 Subject: [PATCH 21/26] Patched Spack on fork/branch --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 6c1868f8..68f7f95f 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 6c1868f8ae48e9547cfc66cd902ee7bd32f29148 +Subproject commit 68f7f95fb1b628c922f3f5684f8e4c9b21b0106e From 981a5d479e29966031872db96e252a10891ce881 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Wed, 1 Nov 2023 09:38:47 -0700 Subject: [PATCH 22/26] spack/patch_yaksa: branch spack from develop --- spack | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spack b/spack index 68f7f95f..de72189f 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 68f7f95fb1b628c922f3f5684f8e4c9b21b0106e +Subproject commit de72189fec9fa67325a93cb64158f41ec51c697b From 5f079fd75b9efec0fcdc6c17a95eb2d83964b62a Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Thu, 2 Nov 2023 08:27:23 -0700 Subject: [PATCH 23/26] Building GPU container, but failing (CUDA) runtime --- .../Dockerfile-containergpucuda | 52 ++++++++++++++----- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index 8db56357..8daf6524 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -1,11 +1,11 @@ -MAINTAINER Philip Sakievich, Sandia National Laboratories -MAINTAINER Jon Rood, NREL +LABEL maintainer="Philip Sakievich, Sandia National Laboratories " # NVIDIA base images: https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags ARG REGISTRY=nvcr.io/nvidia ARG IMAGE=cuda -ARG TAG=11.8.0-devel-ubuntu22.04 +#ARG TAG=11.8.0-devel-ubuntu22.04 +ARG TAG=12.2.0-devel-ubuntu22.04 FROM ${REGISTRY}/${IMAGE}:${TAG} @@ -14,10 +14,10 @@ SHELL ["/bin/bash", "-c"] # Install Spack Prereqs: https://spack.readthedocs.io/en/latest/getting_started.html#system-prerequisites -RUN apt-get update -y && \ - apt-get upgrade -y +RUN apt-get update -yqq && \ + apt-get upgrade -yqq -RUN apt-get install -y \ +RUN apt-get install -yqq \ autoconf \ automake \ bzip2 \ @@ -72,6 +72,7 @@ RUN apt clean -y # Exawind GPU snapshot WORKDIR /exawind-entry +# #RUN git clone --recursive https://github.com/sandialabs/spack-manager # Pre-merge fork RUN git clone --recursive https://github.com/ajpowelsnl/spack-manager @@ -82,18 +83,45 @@ ENV SPACK_MANAGER=/exawind-entry/spack-manager WORKDIR /exawind-entry/spack-manager +# Nota bene: commented code is needed, but does not work in container env # Pre-merge branch from ajpowelsnl/spack-manager fork -RUN git checkout gpucontainer +# RUN git checkout gpucontainer + +# Temp. code: Use branch of Spack w/ patch +# DOESN'T BUILD CORRECTLY +#RUN cd spack +#RUN git remote add amy_fork https://github.com/ajpowelsnl/spack.git +#RUN git fetch amy_fork +#RUN git checkout amy_fork/spack/patch_yaksa + # Snapshot will be generated upon running container -#RUN echo "export SPACK_MANAGER=$SPACK_MANAGER" >> /etc/bash.bashrc && \ -# echo "source $SPACK_MANAGER/start.sh && spack-start" >> /etc/bash.bashrc && \ -# echo "spack external find --all" >> /etc/bash.bashrc && \ -# echo "$SPACK_MANAGER/scripts/create-exawind-snapshot.sh" >> /etc/bash.bashrc && \ -# echo "spack clean -a" >> /etc/bash.bashrc +RUN echo "pwd" >> /etc/bash.bashrc && \ + echo "cd spack" >> /etc/bash.bashrc && \ + echo "git remote add amy_fork https://github.com/ajpowelsnl/spack.git" >> /etc/bash.bashrc && \ + echo "git fetch amy_fork" >> /etc/bash.bashrc && \ + echo "git checkout amy_fork/spack/patch_yaksa" >> /etc/bash.bashrc && \ + echo "cd .." >> /etc/bash.bashrc && \ + echo "pwd" >> /etc/bash.bashrc && \ + echo "git checkout gpucontainer" >> /etc/bash.bashrc && \ + echo "export SPACK_MANAGER=$SPACK_MANAGER" >> /etc/bash.bashrc && \ + echo "source $SPACK_MANAGER/start.sh && spack-start" >> /etc/bash.bashrc && \ + echo "spack external find --all" >> /etc/bash.bashrc && \ + echo "$SPACK_MANAGER/scripts/create-exawind-snapshot.sh" >> /etc/bash.bashrc && \ + echo "spack clean --all" >> /etc/bash.bashrc && \ + echo "spack env activate -d snapshots/exawind/containergpucuda/$(date +%Y-%m-%d)" >> /etc/bash.bashrc && \ + echo "spack load exawind" >> /etc/bash.bashrc # Verify .bashrc # RUN ["/bin/bash", "-c", "tail -n 6 /etc/bash.bashrc"] +# Verify executable: +# 66 spack env activate -d snapshots/exawind/containergpucuda/2023-11-01/ +# 67 spack load exawind +# 68 which exawind +# 69 exawind --help + + + #WORKDIR /exawind-entry CMD [ "/bin/bash" ] From ecd75fd297609f8e3056d3300548720ccb874345 Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Fri, 3 Nov 2023 10:42:48 -0700 Subject: [PATCH 24/26] add ubuntu mpich; Spack-pinned version problematic --- configs/containergpucuda/packages.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index 96b76ecd..0b9be6c0 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -13,7 +13,12 @@ packages: # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml mpich: require: "+cuda" -# Use Ubuntu libncurses-dev, etc., b/c Spack version fails +# Use Ubuntu libncurses-dev, etc., b/c Spack version fails +# Spack-pinned version of mpich builds fail + mpich: + externals: + - spec: mpich@4.0 + prefix: /usr ncurses: externals: - spec: ncurses@6.3 From c4f20297003e7bc409d6417f64844ddc419acc7f Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Fri, 3 Nov 2023 11:05:11 -0700 Subject: [PATCH 25/26] fix up external mpich bloc --- configs/containergpucuda/packages.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configs/containergpucuda/packages.yaml b/configs/containergpucuda/packages.yaml index 0b9be6c0..d878b7c3 100644 --- a/configs/containergpucuda/packages.yaml +++ b/configs/containergpucuda/packages.yaml @@ -11,13 +11,14 @@ packages: mpi: require: mpich # GPU-aware MPICH; See - https://spack.readthedocs.io/en/latest/build_settings.html#package-settings-packages-yaml - mpich: - require: "+cuda" +# mpich: +# require: "+cuda" # Use Ubuntu libncurses-dev, etc., b/c Spack version fails # Spack-pinned version of mpich builds fail mpich: externals: - spec: mpich@4.0 + require: "+cuda" prefix: /usr ncurses: externals: From 76c7ee0d99f69462c50fd2e1d024c538923d4dae Mon Sep 17 00:00:00 2001 From: "Amy J. Powell" Date: Fri, 3 Nov 2023 12:20:46 -0700 Subject: [PATCH 26/26] Dockerfile-containergpucuda: container file for PR --- .../Dockerfile-containergpucuda | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda index 8daf6524..4d47d497 100644 --- a/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda +++ b/hpc_containers/exawind_container_gpucuda/Dockerfile-containergpucuda @@ -4,8 +4,8 @@ LABEL maintainer="Philip Sakievich, Sandia National Laboratories