forked from trixi-framework/Trixi.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
272 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# ESiWACE3 Trixi.jl service | ||
|
||
## Instructions for terrabyte cluster | ||
|
||
You need to get an account at https://docs.terrabyte.lrz.de/services/identity/get-account/ | ||
and set up two-factor authentication. | ||
|
||
### Login | ||
```shell | ||
ssh login.terrabyte.lrz.de | ||
``` | ||
|
||
### Set up t8code | ||
** TODO: change to project directory, then this step can be skipped ** | ||
1. Load modules | ||
```shell | ||
module load gcc/11.2.0 | ||
module load openmpi/4.1.2-gcc11 | ||
module load hdf5/1.10.7-gcc11 | ||
``` | ||
2. Change to scratch folder | ||
```shell | ||
cd $SCRATCH | ||
``` | ||
3. Clone the repository | ||
```shell | ||
git clone --branch 'v3.0.1' --depth 1 https://github.com/DLR-AMR/t8code.git | ||
cd t8code | ||
git submodule init | ||
git submodule update | ||
``` | ||
4. Build using cmake: | ||
```shell | ||
module add cmake | ||
mkdir build | ||
cd build | ||
cmake \ | ||
-DCMAKE_C_COMPILER=mpicc \ | ||
-DCMAKE_CXX_COMPILER=mpicxx \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_INSTALL_PREFIX="$SCRATCH/install/t8code" \ | ||
-DT8CODE_BUILD_TESTS=OFF \ | ||
-DT8CODE_BUILD_TUTORIALS=OFF \ | ||
-DT8CODE_BUILD_EXAMPLES=OFF \ | ||
-DT8CODE_BUILD_BENCHMARKS=OFF \ | ||
-DT8CODE_ENABLE_MPI=ON | ||
.. | ||
nice make -j8 | ||
nice make install -j8 | ||
``` | ||
|
||
## Set up Julia | ||
Julia is not available on the cluster. We need to install it manually. | ||
1. If there no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one | ||
``` | ||
touch $HOME/.bashrc | ||
``` | ||
2. Use the official Julia installer: | ||
```shell | ||
curl -fsSL https://install.julialang.org | sh | ||
``` | ||
Accept the defaults. Once finished you will be told to source your `.bashrc` or re-login. | ||
3. Julia should now be available | ||
```shell | ||
julia --version | ||
``` | ||
4. Install the 1.11 branch | ||
```shell | ||
juliaup add 1.11 | ||
``` | ||
|
||
## Set up Trixi.jl | ||
1. Clone the repository | ||
```shell | ||
git clone https://github.com/benegee/Trixi.jl.git | ||
git switch lc/gpu-develop | ||
``` | ||
2. Go to the `esiwace` directory. We collect necessary environmental settings in | ||
`profile`. Edit this file as neccessary and source it: | ||
```shell | ||
. profile | ||
``` | ||
3. The Julia project is configured by several files: `Project.toml` lists dependencies, | ||
`Manifest.toml` list exact version numbers for all required packages, | ||
`LocalPreferences.toml` contains advanced configuration options. | ||
It should only be necessary to adapt `LocalPreference.toml` to reflect the t8code | ||
installation path. | ||
4. Open Julia via the `$JL` command and instantiate the project: | ||
```shell | ||
$JL --project -e 'using Pkg; Pkg.instantiate()' | ||
``` | ||
|
||
|
||
## Precompile Trixi.jl | ||
1. Make sure that everything is precompiled by running the following: | ||
```shell | ||
$JL --project -e 'using OrdinaryDiffEq, Trixi' | ||
``` | ||
2. To test CUDA, first log in to a GPU node: | ||
```shell | ||
salloc --cluster=hpda2 --partition=hpda2_compute_gpu --nodes=1 --ntasks-per-node=1 --gres=gpu:4 --time=00:30:00 | ||
``` | ||
Then start Julia: | ||
```shell | ||
$JL --project -e 'using CUDA; CUDA.versioninfo()' | ||
``` | ||
|
||
|
||
## Launch | ||
1. SLURM jobscript are found in `jobscripts`. Edit as necessary. At least, you have to | ||
specify your mail address. | ||
2. The actual simulation is configured in `run.jl` and based on Trixi.jl file in `elixirs`. | ||
3. Send job to queue: | ||
```shell | ||
sbatch jobscript/single_node.sh | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
using OrdinaryDiffEq | ||
using Trixi | ||
using CUDA | ||
CUDA.allowscalar(false) | ||
|
||
############################################################################### | ||
# semidiscretization of the compressible Euler equations | ||
|
||
equations = CompressibleEulerEquations3D(1.4) | ||
|
||
function initial_condition_taylor_green_vortex(x, t, | ||
equations::CompressibleEulerEquations3D) | ||
A = 1.0 # magnitude of speed | ||
Ms = 0.1 # maximum Mach number | ||
|
||
rho = 1.0 | ||
v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) | ||
v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) | ||
v3 = 0.0 | ||
p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms | ||
p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + | ||
2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) | ||
|
||
return prim2cons(SVector(rho, v1, v2, v3, p), equations) | ||
end | ||
|
||
initial_condition = initial_condition_taylor_green_vortex | ||
|
||
#volume_flux = flux_ranocha | ||
volume_flux = flux_lax_friedrichs | ||
solver = DGSEM(polydeg=5, surface_flux=volume_flux, | ||
volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) | ||
|
||
coordinates_min = (-1.0, -1.0, -1.0) .* pi | ||
coordinates_max = ( 1.0, 1.0, 1.0) .* pi | ||
|
||
initial_refinement_level = 1 | ||
trees_per_dimension = (4, 4, 4) | ||
|
||
mesh = P4estMesh(trees_per_dimension, polydeg=1, | ||
coordinates_min=coordinates_min, coordinates_max=coordinates_max, | ||
periodicity=true, initial_refinement_level=initial_refinement_level) | ||
|
||
semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) | ||
|
||
|
||
############################################################################### | ||
# ODE solvers, callbacks etc. | ||
|
||
tspan = (0.0, 1000.0) | ||
ode = semidiscretize(semi, tspan; adapt_to=CuArray) | ||
|
||
summary_callback = SummaryCallback() | ||
|
||
stepsize_callback = StepsizeCallback(cfl=0.1) | ||
|
||
callbacks = CallbackSet(summary_callback, stepsize_callback) | ||
|
||
|
||
############################################################################### | ||
# run the simulation | ||
|
||
maxiters=200 | ||
|
||
# disable warnings when maxiters is reached | ||
sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), | ||
dt=1.0, | ||
save_everystep=false, callback=callbacks, | ||
maxiters=maxiters, verbose=false); | ||
|
||
# print the timer summary | ||
summary_callback() | ||
|
||
finalize(mesh) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash -x | ||
#SBATCH --cluster=hpda2 | ||
#SBATCH --partition=hpda2_compute_gpu | ||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks-per-node=4 | ||
#SBATCH --gres=gpu:4 | ||
#SBATCH --mail-user=<mail_addr> | ||
#SBATCH --mail-type=all | ||
#SBATCH --export=NONE | ||
#SBATCH --output=stdout.%j | ||
#SBATCH --error=stderr.%j | ||
#SBATCH --time=00:30:00 | ||
|
||
source profile | ||
|
||
srun $JL --threads=1 --project=. run.jl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
using Trixi | ||
using MPI | ||
using TimerOutputs | ||
using CUDA | ||
|
||
function main(elixir_path) | ||
|
||
comm = MPI.COMM_WORLD | ||
rank = MPI.Comm_rank(comm) | ||
isroot = rank == 0 | ||
|
||
# pin rank to device? | ||
#if machine == "jedi" | ||
# CUDA.device!(rank % 4) | ||
#end | ||
print("Rank $rank has device: $(CUDA.device())\n") | ||
|
||
# setup | ||
maxiters = 400 | ||
|
||
if isroot | ||
println("Warming up...") | ||
end | ||
|
||
# start simulation with tiny final time to trigger precompilation | ||
duration_precompile = @elapsed trixi_include(elixir_path, | ||
tspan=(0.0, 1e-14)) | ||
|
||
if isroot | ||
println("Finished warm-up in $duration_precompile seconds\n") | ||
println("Starting simulation...") | ||
end | ||
|
||
# start the real simulation | ||
duration_elixir = @elapsed trixi_include(elixir_path, maxiters=maxiters) | ||
|
||
# store metrics (on every rank!) | ||
metrics = Dict{String, Float64}("elapsed time" => duration_elixir) | ||
|
||
# read TimerOutputs timings | ||
timer = Trixi.timer() | ||
metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer) | ||
metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) | ||
|
||
# compute performance index | ||
nrhscalls = Trixi.ncalls(semi.performance_counter) | ||
walltime = 1.0e-9 * take!(semi.performance_counter) | ||
metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) | ||
|
||
# gather metrics from all ranks | ||
gathered_metrics = MPI.gather(metrics, comm) | ||
|
||
if isroot | ||
# reduce metrics per rank | ||
open("metrics.out", "w") do io | ||
for (key, _) in gathered_metrics[1] | ||
println(io, key, ": ", mapreduce(x->x[key], min, gathered_metrics)) | ||
end | ||
end | ||
end | ||
end | ||
|
||
# hardcoded elixir | ||
elixir_path = joinpath(@__DIR__(), "elixirs/elixir_euler_taylor_green_vortex.jl") | ||
|
||
main(elixir_path) |