Skip to content

Commit

Permalink
Merge branch 'development' into MonoAdv
Browse files Browse the repository at this point in the history
  • Loading branch information
AMLattanzi authored Jul 23, 2024
2 parents bd9eda8 + 144d35c commit 060bf63
Show file tree
Hide file tree
Showing 60 changed files with 1,497 additions and 939 deletions.
55 changes: 55 additions & 0 deletions Build/erf_containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# ERF (https://github.com/erf-model/ERF) containerfile for NERSC Perlmutter
# Paul Lin, LBNL/NERSC
# May 2024

FROM nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04

WORKDIR /app
ARG base_dir=/app/erf

RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
g++-12 \
gcc-12 \
gfortran-12 \
git \
libtool \
make \
tar \
autoconf \
automake \
wget \
python3 \
cmake

# MPICH to be swapped out later for Cray MPI
ARG mpich_version=4.2.2
ARG mpich_dir=mpich-${mpich_version}

RUN wget https://www.mpich.org/static/downloads/$mpich_version/$mpich_dir.tar.gz && \
tar xzf $mpich_dir.tar.gz && \
cd $mpich_dir && \
./configure CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12 F77=/usr/bin/gfortran-12 FC=/usr/bin/gfortran-12 && \
make -j8 && \
make install && \
make clean && \
cd .. && \
rm -rf $mpich_dir $mpich_dir.tar.gz

RUN mkdir ${base_dir}

ARG build_dir=MyBuild

RUN cd ${base_dir} && git clone --recursive https://github.com/erf-model/ERF.git && \
cd ERF && mkdir ${build_dir} && cd ${build_dir} && \
cmake \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_Fortran_COMPILER=mpif90 \
-DCMAKE_BUILD_TYPE:STRING=Release \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DERF_ENABLE_MPI:BOOL=ON \
-DERF_ENABLE_CUDA=ON \
.. && \
make -j8

10 changes: 9 additions & 1 deletion CMake/BuildERFExe.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ function(build_erf_lib erf_lib_name)
${SRC_DIR}/Microphysics/Kessler/Init_Kessler.cpp
${SRC_DIR}/Microphysics/Kessler/Kessler.cpp
${SRC_DIR}/Microphysics/Kessler/Update_Kessler.cpp
${SRC_DIR}/WindFarmParametrization/Fitch/AdvanceFitch.cpp
${SRC_DIR}/WindFarmParametrization/EWP/AdvanceEWP.cpp
${SRC_DIR}/WindFarmParametrization/SimpleActuatorDisk/AdvanceSimpleAD.cpp
${SRC_DIR}/LandSurfaceModel/SLM/SLM.cpp
${SRC_DIR}/LandSurfaceModel/MM5/MM5.cpp
)
Expand Down Expand Up @@ -233,7 +236,12 @@ function(build_erf_lib erf_lib_name)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/Microphysics)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/Microphysics/Null)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/Microphysics/SAM)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/Microphysics/Kessler)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/Microphysics/Kessler)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/WindFarmParametrization)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/WindFarmParametrization/Null)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/WindFarmParametrization/Fitch)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/WindFarmParametrization/EWP)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/WindFarmParametrization/SimpleActuatorDisk)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/LandSurfaceModel)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/LandSurfaceModel/Null)
target_include_directories(${erf_lib_name} PUBLIC ${SRC_DIR}/LandSurfaceModel/SLM)
Expand Down
8 changes: 7 additions & 1 deletion Docs/sphinx_doc/Inputs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,11 @@ methods for defining how the terrain-fitted coordinates given the topography:
- Sullivan Terrain Following (name TBD):
The influence of the terrain decreases with the cube of height.

A custom surface definition may be provided through the ``erf.terrain_file_name`` parameter.
The specified input text file should have three space-delimited columns for x, y, and z coordinates,
which will dictate the location of surface *nodes*. All surface nodes within the computational
domain must be specified within the text file, but may be specified in any order.

List of Parameters
------------------

Expand All @@ -1073,7 +1078,8 @@ List of Parameters
| | following | 1, | |
| | | 2 | |
+-----------------------------+--------------------+--------------------+------------+

| **erf.terrain_file_name** | filename | String | NONE |
+-----------------------------+--------------------+--------------------+------------+

Examples of Usage
-----------------
Expand Down
174 changes: 174 additions & 0 deletions Docs/sphinx_doc/containers.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
.. role:: cpp(code)
:language: c++

.. _containers:

ERF containers on Perlmutter
============================

The following is a brief introduction to ERF and containers on the NERSC Perlmutter platform.

For more details, please see `NERSC's detailed containers documentation <https://docs.nersc.gov/development/containers>`_, which also includes containers tutorials.

Container images can be built on one's desktop/laptop using a standard container framework such as Docker, Podman (Pod Manager), etc. or directly on a Perlmutter login node using ``podman-hpc``. ``podman-hpc`` is a NERSC-developed wrapper that extends the capabilities of Podman for HPC. Containers are run on Perlmutter using either ``podman-hpc`` or ``shifter`` (also developed at NERSC). NERSC has a good `podman-hpc tutorial <https://docs.nersc.gov/development/containers/podman-hpc/podman-beginner-tutorial>`_.

Example ERF containerfile
~~~~~~~~~~~~~~~~~~~~~~~~~

The following is an example ERF containerfile with filename ``erf_containerfile``:

.. code:: shell
1 FROM nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
2
3 WORKDIR /app
4 ARG base_dir=/app/erf
5
6 RUN apt-get update -y && \
7 DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
8 g++-12 \
9 gcc-12 \
10 gfortran-12 \
11 git \
12 libtool \
13 make \
14 tar \
15 autoconf \
16 automake \
17 wget \
18 python3 \
19 cmake
20
21 # MPICH to be swapped out later for Cray MPI
22 ARG mpich_version=4.2.2
23 ARG mpich_dir=mpich-${mpich_version}
24
25 RUN wget https://www.mpich.org/static/downloads/$mpich_version/$mpich_dir.tar.gz && \
26 tar xzf $mpich_dir.tar.gz && \
27 cd $mpich_dir && \
28 ./configure CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12 F77=/usr/bin/gfortran-12 FC=/usr/bin/gfortran-12 && \
29 make -j8 && \
30 make install && \
31 make clean && \
32 cd .. && \
33 rm -rf $mpich_dir $mpich_dir.tar.gz
34
35 RUN mkdir ${base_dir}
36
37 ARG build_dir=MyBuild
38
39 RUN cd ${base_dir} && git clone --recursive https://github.com/erf-model/ERF.git && \
40 cd ERF && mkdir ${build_dir} && cd ${build_dir} && \
41 cmake \
42 -DCMAKE_C_COMPILER=mpicc \
43 -DCMAKE_CXX_COMPILER=mpicxx \
44 -DCMAKE_Fortran_COMPILER=mpif90 \
45 -DCMAKE_BUILD_TYPE:STRING=Release \
46 -DCMAKE_CUDA_ARCHITECTURES=80 \
47 -DERF_ENABLE_MPI:BOOL=ON \
48 -DERF_ENABLE_CUDA=ON \
49 .. && \
50 make -j8
Line numbers were added for instructional purposes but should not appear in containerfile.
The containerfile is available at https://github.com/erf-model/ERF/blob/development/Build/erf_containerfile

* Line 1 downloads a container base image from NVIDIA's container registry that contains the Ubuntu 22.04 operating system and CUDA 12.2.0
* Line 3 sets the working directory
* Line 4 sets a value for the variable ``base_dir``
* Lines 6-19 download the GNU 12 compilers and all the necessary utilities for building ERF in the container
* Lines 21-33 download the MPICH source code and builds it. At runtime this MPICH will get replaced by Perlmutter's Cray MPI
* Lines 39-50 clone the ERF repo and build it with cmake/make



Build the container on Perlmutter using ``podman-hpc`` and using the containerfile ``erf_containerfile`` with name ``erf`` and tag ``1.00`` (``-t <name>:<tag>``)

.. code:: shell
podman-hpc build -t erf:1.00 -f erf_containerfile
In order to use this image in a job or access it from any other login node, one needs to migrate the image onto the $SCRATCH filesystem by issuing the following command:

.. code:: shell
podman-hpc migrate erf:1.00
``podman-hpc images`` will display the following

.. code:: shell
user@perlmutter:login12:~> podman-hpc images
REPOSITORY TAG IMAGE ID CREATED SIZE R/O
localhost/erf 1.00 893605c3ee9b 5 hours ago 16.1 GB true
localhost/erf 1.00 893605c3ee9b 5 hours ago 16.1 GB false
Note that ``localhost`` will not be needed for podman-hpc commands.

Run container on Perlmutter
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Submit the following slurm batch script in order to use the image in a job

.. code:: shell
#!/bin/bash
#SBATCH --account=<proj>
#SBATCH --constraint=gpu
#SBATCH --job-name=erf
#SBATCH --nodes=1
#SBATCH --time=0:05:00
#SBATCH -q regular
srun -N 1 -n 4 -c 32 --ntasks-per-node=4 --gpus-per-node=4 ./device_wrapper \
podman-hpc run --rm --mpi --gpu -v /pscratch/sd/u/user/erf/abl:/run -w /run erf:1.00 \
/app/erf/ERF/MyBuild/Exec/ABL/erf_abl inputs_smagorinsky amrex.use_gpu_aware_mpi=0
``device_wrapper`` script:

.. code:: shell
#!/bin/bash
# select_cpu_device wrapper script
export CUDA_VISIBLE_DEVICES=$((3-$SLURM_LOCALID))
exec $*
Arguments for ``podman-hpc run`` used above

* ``--rm`` removes the container after exit
* ``--mpi`` enables Cray MPI support (swaps MPICH in the container for Perlmutter's Cray MPI)
* ``--gpu`` enables NVIDIA GPU support
* ``-v /pscratch/sd/u/user/erf/abl:/run`` mounts ``/pscratch/sd/u/user/erf/abl`` on Perlmutter onto ``/run`` in the container
* ``-w /run`` makes the ``/run`` directory inside the container the working directory, i.e. any output from the ERF run will be written to the ``/run`` directory in the container, which will appear in the ``/pscratch/sd/u/user/erf/abl`` directory on Perlmutter.
* ``erf:1.00`` container name and tag
* ``/app/erf/ERF/MyBuild/Exec/ABL/erf_abl`` ERF binary in container

The remaining arguments are the normal ERF command line arguments.

Please issue ``podman-hpc --help`` for the help page and ``podman-hpc run --help`` for the ``podman-hpc run`` help page.

Container image libraries provide a convenient way to store and share images.
The best known one is probably Docker Hub. NERSC provides a private registry to its users via `registry.nersc.gov <https://docs.nersc.gov/development/containers/registry>`_.

``shifter`` is a NERSC-developed tool that provides an alternative method for running containers on Perlmutter. `NERSC's containers documentation <https://docs.nersc.gov/development/containers>`_ provides an introduction to shifter including a tutorial.

Common Issues
~~~~~~~~~~~~~

* Using ``podman`` rather than ``podman-hpc`` on Perlmutter (best to always use ``podman-hpc``)
* Before issuing ``podman-hpc migrate <name>:<tag>`` after having issued the command earlier with identical ``<name>:<tag>``, if want to keep the same name, please change the ``<tag>`` to one that has not been used previously. If want to use an identical ``<name>:<tag>`` used in a previous ``podman-hpc migrate`` command, please first issue ``podman-hpc rmsqi <name>:<tag>`` to delete the old image. Otherwise could potentially end up with errors such as

.. code:: shell
Error: read-only image store assigns the same name to multiple images
and will have resort to `various methods <https://docs.nersc.gov/development/containers/podman-hpc/overview/#troubleshooting>`_ to get out of the bad configuration state.

* The default containerfile is a file called ``Containerfile`` (case sensitive). When that file is being used, can replace ``-f erf_containerfile`` with a period:

.. code:: shell
podman-hpc build -t erf:1.00 .
Note that for this case, the period is mandatory. Here it does not denote the end of a sentence.
6 changes: 6 additions & 0 deletions Docs/sphinx_doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,9 @@ In addition to this documentation, there is API documentation for ERF generated
:maxdepth: 1
Applications_Requirements.rst
.. toctree::
:caption: CONTAINERS
:maxdepth: 1
containers.rst
50 changes: 27 additions & 23 deletions Exec/DevTests/ABL_perturbation_inflow/toc_inout_inputs
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
# ------------------ INPUTS TO MAIN PROGRAM -------------------
stop_time = 20.0
stop_time = 60.0
#max_step = 5

erf.no_substepping = 1
amrex.fpe_trap_invalid = 1
fabarray.mfiter_tile_size = 1024 1024 1024

# PROBLEM SIZE & GEOMETRY
geometry.prob_extent = 40 2.5 10
#geometry.prob_extent = 2. 0.25 1.
amr.n_cell = 128 16 64
#Larger problem
geometry.prob_extent = 40 5 10
amr.n_cell = 256 32 64

# Quick debug problem
#geometry.prob_extent = 20 5 10
#amr.n_cell = 64 16 32

geometry.is_periodic = 0 1 0

xlo.type = "Inflow"
xhi.type = "Outflow"
zhi.type = "SlipWall"
#zlo.type = "NoSlipWall"
zlo.type = "Most"

zlo.type = "Most"
erf.most.flux_type = "custom"
erf.most.ustar = 0.0395 # z=10.
#erf.most.ustar = 0.395 # z=1.0
Expand All @@ -27,17 +31,16 @@ erf.most.qstar = 0. # qv flux

xlo.density = 1.0
xlo.theta = 300.0
#xlo.velocity = 1.0 0.0 0.0
xlo.dirichlet_file = "input_ReTau395Ana_inflow.txt"

# TIME STEP CONTROL
erf.cfl = 0.4
erf.cfl = 0.5
erf.dynamicViscosity = 0.001
erf.use_gravity = true
erf.buoyancy_type = 2

# DIAGNOSTICS & VERBOSITY
erf.sum_interval = 0 # timesteps between computing mass
erf.pert_interval = 5 # timesteps between perturbation output message XXX
erf.sum_interval = 1 # timesteps between computing mass
erf.pert_interval = 1 # timesteps between perturbation output message XXX
erf.v = 0 # verbosity in ERF.cpp XXX
amr.v = 0 # verbosity in Amr.cpp

Expand All @@ -46,8 +49,8 @@ amr.max_level = 0 # maximum level number allowed

# PLOTFILES
erf.plot_file_1 = plt # prefix of plotfile name
#erf.plot_per_1 = 0.1
erf.plot_int_1 = 5
erf.plot_per_1 = 0.05
#erf.plot_int_1 = 5
erf.plot_vars_1 = density rhoadv_0 x_velocity y_velocity z_velocity pressure temp theta

# CHECKPOINT FILES
Expand All @@ -56,28 +59,29 @@ erf.plot_vars_1 = density rhoadv_0 x_velocity y_velocity z_velocity pressure

# SOLVER CHOICE
erf.alpha_T = 0.0
erf.alpha_C = 1.0

erf.alpha_C = 0.0
erf.molec_diff_type = "None"
erf.les_type = "Smagorinsky"
erf.Cs = 0.1

# Initial condition for the entire field
erf.init_type = "input_sounding"
erf.input_sounding_file = "input_ReTau395Ana_sounding.txt"
erf.use_gravity = true
erf.buoyancy_type = 2

# Turbulent inflow generation
erf.perturbation_type = "source"
erf.perturbation_direction = 1 0 0
erf.perturbation_layers = 3
erf.perturbation_offset = 3

erf.perturbation_box_dims = 4 8 8
erf.perturbation_box_dims = 8 8 4
erf.perturbation_nondimensional = 0.042 # Ri
erf.perturbation_T_infinity = 300.0
erf.perturbation_T_intensity = 0.05
erf.perturbation_T_intensity = 0.1

# Initial condition for the entire field
#erf.init_type = "uniform"
erf.init_type = "input_sounding"
erf.input_sounding_file = "input_ReTau395Ana_sounding.txt"

# PROBLEM PARAMETERS
#prob.U_0_Pert_Mag = 1.0
#prob.V_0_Pert_Mag = 1.0
#prob.W_0_Pert_Mag = 0.0
prob.rho_0 = 1.0
prob.T_0 = 300.0
Loading

0 comments on commit 060bf63

Please sign in to comment.