From 9c857e476498afadae611053f965c16e2ed19a04 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 17 Dec 2024 12:59:31 +0100
Subject: [PATCH] edit folder for esiwace project

---
 esiwace/README.md                             | 116 ++++++++++++++++++
 .../elixir_euler_taylor_green_vortex.jl       |  74 +++++++++++
 esiwace/jobscripts/single_node.sh             |  16 +++
 esiwace/run.jl                                |  66 ++++++++++
 4 files changed, 272 insertions(+)
 create mode 100644 esiwace/README.md
 create mode 100644 esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
 create mode 100644 esiwace/jobscripts/single_node.sh
 create mode 100644 esiwace/run.jl

diff --git a/esiwace/README.md b/esiwace/README.md
new file mode 100644
index 00000000000..32265cf8b4e
--- /dev/null
+++ b/esiwace/README.md
@@ -0,0 +1,116 @@
+# ESiWACE3 Trixi.jl service
+
+## Instructions for terrabyte cluster
+
+You need to get an account at https://docs.terrabyte.lrz.de/services/identity/get-account/
+and set up two-factor authentication.
+
+### Login
+```shell
+ssh login.terrabyte.lrz.de
+```
+
+### Set up t8code
+** TODO: change to project directory, then this step can be skipped **
+1. Load modules
+   ```shell
+   module load gcc/11.2.0
+   module load openmpi/4.1.2-gcc11
+   module load hdf5/1.10.7-gcc11
+   ```
+2. Change to scratch folder
+   ```shell
+   cd $SCRATCH
+   ```
+3. Clone the repository
+   ```shell
+   git clone --branch 'v3.0.1' --depth 1 https://github.com/DLR-AMR/t8code.git
+   cd t8code
+   git submodule init
+   git submodule update
+   ```
+4. Build using cmake:
+   ```shell
+   module add cmake
+   mkdir build
+   cd build
+   cmake \
+     -DCMAKE_C_COMPILER=mpicc \
+     -DCMAKE_CXX_COMPILER=mpicxx \
+     -DCMAKE_BUILD_TYPE=Release \
+     -DCMAKE_INSTALL_PREFIX="$SCRATCH/install/t8code" \
+     -DT8CODE_BUILD_TESTS=OFF \
+     -DT8CODE_BUILD_TUTORIALS=OFF \
+     -DT8CODE_BUILD_EXAMPLES=OFF \
+     -DT8CODE_BUILD_BENCHMARKS=OFF \
+     -DT8CODE_ENABLE_MPI=ON
+     ..
+   nice make -j8
+   nice make install -j8
+   ```
+
+## Set up Julia
+Julia is not available on the cluster. We need to install it manually.
+1. If there no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one
+   ```
+   touch $HOME/.bashrc
+   ```
+2. Use the official Julia installer:
+   ```shell 
+   curl -fsSL https://install.julialang.org | sh
+   ```
+   Accept the defaults. Once finished you will be told to source your `.bashrc` or re-login.
+3. Julia should now be available
+   ```shell
+   julia --version
+   ```
+4. Install the 1.11 branch
+   ```shell
+   juliaup add 1.11
+   ```
+
+## Set up Trixi.jl
+1. Clone the repository
+   ```shell
+   git clone https://github.com/benegee/Trixi.jl.git
+   git switch lc/gpu-develop
+   ```
+2. Go to the `esiwace` directory. We collect necessary environmental settings in
+   `profile`. Edit this file as neccessary and source it:
+   ```shell
+   . profile
+   ```
+3. The Julia project is configured by several files: `Project.toml` lists dependencies,
+   `Manifest.toml` list exact version numbers for all required packages,
+   `LocalPreferences.toml` contains advanced configuration options.
+   It should only be necessary to adapt `LocalPreference.toml` to reflect the t8code
+   installation path.
+4. Open Julia via the `$JL` command and instantiate the project:
+   ```shell
+   $JL --project -e 'using Pkg; Pkg.instantiate()'
+   ```
+
+
+## Precompile Trixi.jl
+1. Make sure that everything is precompiled by running the following:
+   ```shell
+   $JL --project -e 'using OrdinaryDiffEq, Trixi'
+   ```
+2. To test CUDA, first log in to a GPU node:
+   ```shell
+   salloc --cluster=hpda2 --partition=hpda2_compute_gpu --nodes=1 --ntasks-per-node=1 --gres=gpu:4 --time=00:30:00
+   ```
+   Then start Julia:
+   ```shell
+   $JL --project -e 'using CUDA; CUDA.versioninfo()'
+   ```
+
+
+## Launch
+1. SLURM jobscript are found in `jobscripts`. Edit as necessary. At least, you have to
+   specify your mail address.
+2. The actual simulation is configured in `run.jl` and based on Trixi.jl file in `elixirs`.
+3. Send job to queue:
+   ```shell
+   sbatch jobscript/single_node.sh
+   ```
diff --git a/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
new file mode 100644
index 00000000000..1d204b43680
--- /dev/null
+++ b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
@@ -0,0 +1,74 @@
+using OrdinaryDiffEq
+using Trixi
+using CUDA
+CUDA.allowscalar(false)
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+function initial_condition_taylor_green_vortex(x, t,
+                                               equations::CompressibleEulerEquations3D)
+    A  = 1.0 # magnitude of speed
+    Ms = 0.1 # maximum Mach number
+
+    rho = 1.0
+    v1  =  A * sin(x[1]) * cos(x[2]) * cos(x[3])
+    v2  = -A * cos(x[1]) * sin(x[2]) * cos(x[3])
+    v3  = 0.0
+    p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+    p   = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) +
+          2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3]))
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+initial_condition = initial_condition_taylor_green_vortex
+
+#volume_flux = flux_ranocha
+volume_flux = flux_lax_friedrichs
+solver = DGSEM(polydeg=5, surface_flux=volume_flux,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (-1.0, -1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0,  1.0) .* pi
+
+initial_refinement_level = 1
+trees_per_dimension = (4, 4, 4)
+
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=true, initial_refinement_level=initial_refinement_level)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1000.0)
+ode = semidiscretize(semi, tspan; adapt_to=CuArray)
+
+summary_callback = SummaryCallback()
+
+stepsize_callback = StepsizeCallback(cfl=0.1)
+
+callbacks = CallbackSet(summary_callback, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+maxiters=200
+
+# disable warnings when maxiters is reached
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0,
+            save_everystep=false, callback=callbacks,
+            maxiters=maxiters, verbose=false);
+
+# print the timer summary
+summary_callback()
+
+finalize(mesh)
diff --git a/esiwace/jobscripts/single_node.sh b/esiwace/jobscripts/single_node.sh
new file mode 100644
index 00000000000..a0cf8b6742f
--- /dev/null
+++ b/esiwace/jobscripts/single_node.sh
@@ -0,0 +1,16 @@
+#!/bin/bash -x
+#SBATCH --cluster=hpda2
+#SBATCH --partition=hpda2_compute_gpu
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --mail-user=<mail_addr>
+#SBATCH --mail-type=all
+#SBATCH --export=NONE
+#SBATCH --output=stdout.%j
+#SBATCH --error=stderr.%j
+#SBATCH --time=00:30:00
+
+source profile
+
+srun $JL --threads=1 --project=. run.jl
diff --git a/esiwace/run.jl b/esiwace/run.jl
new file mode 100644
index 00000000000..21b5e6d5975
--- /dev/null
+++ b/esiwace/run.jl
@@ -0,0 +1,66 @@
+using Trixi
+using MPI
+using TimerOutputs
+using CUDA
+
+function main(elixir_path)
+
+    comm = MPI.COMM_WORLD
+    rank = MPI.Comm_rank(comm)
+    isroot = rank == 0
+
+    # pin rank to device?
+    #if machine == "jedi"
+    #    CUDA.device!(rank % 4)
+    #end
+    print("Rank $rank has device: $(CUDA.device())\n")
+
+    # setup
+    maxiters = 400
+
+    if isroot
+        println("Warming up...")
+    end
+
+    # start simulation with tiny final time to trigger precompilation
+    duration_precompile = @elapsed trixi_include(elixir_path,
+        tspan=(0.0, 1e-14))
+
+    if isroot
+        println("Finished warm-up in $duration_precompile seconds\n")
+        println("Starting simulation...")
+    end
+
+    # start the real simulation
+    duration_elixir = @elapsed trixi_include(elixir_path, maxiters=maxiters)
+
+    # store metrics (on every rank!)
+    metrics = Dict{String, Float64}("elapsed time" => duration_elixir)
+
+    # read TimerOutputs timings
+    timer = Trixi.timer()
+    metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer)
+    metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"])
+
+    # compute performance index
+    nrhscalls = Trixi.ncalls(semi.performance_counter)
+    walltime = 1.0e-9 * take!(semi.performance_counter)
+    metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls)
+
+    # gather metrics from all ranks
+    gathered_metrics = MPI.gather(metrics, comm)
+
+    if isroot
+        # reduce metrics per rank
+        open("metrics.out", "w") do io
+            for (key, _) in gathered_metrics[1]
+                println(io, key, ": ", mapreduce(x->x[key], min, gathered_metrics))
+            end
+        end
+    end
+end
+
+# hardcoded elixir
+elixir_path = joinpath(@__DIR__(), "elixirs/elixir_euler_taylor_green_vortex.jl")
+
+main(elixir_path)