From 95a021d7534053cdf44ace56efd67d3fc65828df Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Apr 2024 13:04:10 +0200
Subject: [PATCH 01/89] Wrap from_abaqus routines.

---
 src/meshes/p4est_mesh.jl | 44 ++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl
index abe9d9345b5..61713867e0f 100644
--- a/src/meshes/p4est_mesh.jl
+++ b/src/meshes/p4est_mesh.jl
@@ -387,11 +387,43 @@ function P4estMesh{NDIMS}(meshfile::String;
                             p4est_partition_allow_for_coarsening)
 end
 
+# Wrapper for `p4est_connectivity_from_hohqmesh_abaqus`. The latter is used
+# by `T8codeMesh`, too.
+function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
+                                         n_dimensions, RealT)
+    connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_hohqmesh_abaqus(meshfile,
+                                                                                          initial_refinement_level,
+                                                                                          n_dimensions,
+                                                                                          RealT)
+
+    p4est = new_p4est(connectivity, initial_refinement_level)
+
+    return p4est, tree_node_coordinates, nodes, boundary_names
+end
+
+# Wrapper for `p4est_connectivity_from_standard_abaqus`. The latter is used
+# by `T8codeMesh`, too.
+function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
+                                         initial_refinement_level, n_dimensions, RealT,
+                                         boundary_symbols)
+    connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_standard_abaqus(meshfile,
+                                                                                  mapping,
+                                                                                  polydeg,
+                                                                                  initial_refinement_level,
+                                                                                  n_dimensions,
+                                                                                  RealT,
+                                                                                  boundary_symbols)
+
+    p4est = new_p4est(connectivity, initial_refinement_level)
+
+    return p4est, tree_node_coordinates, nodes, boundary_names
+end
+
 # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1]
 # and a list of boundary names for the `P4estMesh`. High-order boundary curve information as well as
 # the boundary names on each tree are provided by the `meshfile` created by
 # [`HOHQMesh.jl`](https://github.com/trixi-framework/HOHQMesh.jl).
-function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
+function p4est_connectivity_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
                                          n_dimensions, RealT)
     # Create the mesh connectivity using `p4est`
     connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
@@ -440,16 +472,14 @@ function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
         file_idx += 1
     end
 
-    p4est = new_p4est(connectivity, initial_refinement_level)
-
-    return p4est, tree_node_coordinates, nodes, boundary_names
+    return connectivity, tree_node_coordinates, nodes, boundary_names
 end
 
 # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1]
 # and a list of boundary names for the `P4estMesh`. The tree node coordinates are computed according to
 # the `mapping` passed to this function using polynomial interpolants of degree `polydeg`. All boundary
 # names are given the name `:all`.
-function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
+function p4est_connectivity_from_standard_abaqus(meshfile, mapping, polydeg,
                                          initial_refinement_level, n_dimensions, RealT,
                                          boundary_symbols)
     # Create the mesh connectivity using `p4est`
@@ -474,8 +504,6 @@ function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
     calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, vertices,
                                 tree_to_vertex)
 
-    p4est = new_p4est(connectivity, initial_refinement_level)
-
     if boundary_symbols === nothing
         # There's no simple and generic way to distinguish boundaries without any information given.
         # Name all of them :all.
@@ -495,7 +523,7 @@ function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
                                            Val(n_dimensions))
     end
 
-    return p4est, tree_node_coordinates, nodes, boundary_names
+    return connectivity, tree_node_coordinates, nodes, boundary_names
 end
 
 function parse_elements(meshfile, n_trees, n_dims)

From 178ec183cc607e6a5c0ce074cfc6cf1483d12de8 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Apr 2024 13:05:17 +0200
Subject: [PATCH 02/89] Implement geometry data transfer from t8code to Trixi.

---
 src/Trixi.jl               |   2 +
 src/auxiliary/auxiliary.jl |  11 +
 src/meshes/t8code_mesh.jl  | 451 ++++++++++++++++++++++++-------------
 test/test_t8code_3d.jl     |   2 +-
 4 files changed, 304 insertions(+), 162 deletions(-)

diff --git a/src/Trixi.jl b/src/Trixi.jl
index 883f8d66f07..8c4a987ebd5 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -288,6 +288,8 @@ export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!,
        adapt_to_mesh_level,
        iplot, iplot!
 
+export GmshFile, AbaqusFile
+
 function __init__()
     init_mpi()
 
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 972a748c56b..34e7ccbbfdb 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -370,4 +370,15 @@ function download(src_url, file_path)
 
     return file_path
 end
+
+abstract type MeshFile{NDIMS} end
+
+struct GmshFile{NDIMS} <: MeshFile{NDIMS}
+  path :: String
+end
+
+struct AbaqusFile{NDIMS} <: MeshFile{NDIMS}
+  path :: String
+end
+
 end # @muladd
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index cb2ac787e14..72f7cf337b1 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,7 +26,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
-    function T8codeMesh{NDIMS}(forest, tree_node_coordinates, nodes,
+    function T8codeMesh{NDIMS}(forest :: Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
                                current_filename) where {NDIMS}
         is_parallel = mpi_isparallel() ? True() : False()
@@ -100,6 +100,122 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh)
     end
 end
 
+"""
+    T8codeMesh(forest, boundary_names; polydeg, mapping=identity)
+
+Create a 'T8codeMesh'.
+
+# Arguments
+- 'forest': Pointer to a commited forest.
+- 'boundary_names': List of boundary names.
+- 'polydeg::Integer': Polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+- `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+"""
+function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; polydeg = 1, mapping = nothing) where {NDIMS, RealT}
+    # In t8code reference space is [0,1].
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = 0.5 .* (basis.nodes .+ 1.0)
+
+    cmesh = t8_forest_get_cmesh(forest)
+    num_trees = t8_forest_get_num_global_trees(forest)
+
+    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
+                                                    ntuple(_ -> length(nodes), NDIMS)...,
+                                                    num_trees)
+
+    coords_ref = Vector{Cdouble}(undef, 3)
+
+    # Calculate node coordinates of reference mesh.
+    if NDIMS == 2
+        num_corners = 4 # quadrilateral
+
+        # Testing for negative element volumes.
+        verts = zeros(3, num_corners)
+        for itree in 1:num_trees
+            veptr = t8_cmesh_get_tree_vertices(cmesh, itree-1)
+
+            # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
+            # sometimes does not work since `veptr` is not necessarily properly
+            # aligned to 8 bytes.
+            for icorner in 1:num_corners
+                verts[1, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 1)
+                verts[2, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 2)
+            end
+
+            # Check if tree's node ordering is right-handed or print a warning.
+            let z = zero(eltype(verts)), o = one(eltype(verts))
+                u = verts[:, 2] - verts[:, 1]
+                v = verts[:, 3] - verts[:, 1]
+                w = [z, z, o]
+
+                # Triple product gives signed volume of spanned parallelepiped.
+                vol = dot(cross(u, v), w)
+
+                if vol < z
+                    @warn "Discovered negative volumes in `cmesh`: vol = $vol"
+                end
+            end
+
+            # Query geometry data from t8code.
+            for j in eachindex(nodes), i in eachindex(nodes)
+                coords_ref[1] = nodes[i]
+                coords_ref[2] = nodes[j]
+                coords_ref[3] = 0.0
+                t8_geometry_evaluate(cmesh, itree-1, coords_ref, 1, @view(tree_node_coordinates[:, i, j, itree]))
+            end
+        end
+
+    elseif NDIMS == 3
+        num_corners = 8 # hexahedron
+
+        # Testing for negative element volumes.
+        verts = zeros(3, num_corners)
+        for itree in 1:num_trees
+            veptr = t8_cmesh_get_tree_vertices(cmesh, itree-1)
+
+            # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
+            # sometimes does not work since `veptr` is not necessarily properly
+            # aligned to 8 bytes.
+            for icorner in 1:num_corners
+                verts[1, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 1)
+                verts[2, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 2)
+                verts[3, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 3)
+            end
+
+            # Check if tree's node ordering is right-handed or print a warning.
+            let z = zero(eltype(verts))
+                u = verts[:, 2] - verts[:, 1]
+                v = verts[:, 3] - verts[:, 1]
+                w = verts[:, 5] - verts[:, 1]
+
+                # Triple product gives signed volume of spanned parallelepiped.
+                vol = dot(cross(u, v), w)
+
+                if vol < z
+                    @warn "Discovered negative volumes in `cmesh`: vol = $vol"
+                end
+            end
+
+            # Query geometry data from t8code.
+            for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
+                coords_ref[1] = nodes[i]
+                coords_ref[2] = nodes[j]
+                coords_ref[3] = nodes[k]
+                t8_geometry_evaluate(cmesh, itree-1, coords_ref, 1, @view(tree_node_coordinates[:, i, j, k, itree]))
+            end
+        end
+    end
+
+    # Apply user defined mapping.
+    map_node_coordinates!(tree_node_coordinates, mapping)
+
+    return T8codeMesh{NDIMS}(forest, tree_node_coordinates, basis.nodes,
+                             boundary_names, "")
+end
+
 """
     T8codeMesh(trees_per_dimension; polydeg, mapping=identity,
                RealT=Float64, initial_refinement_level=0, periodicity=true)
@@ -187,57 +303,10 @@ function T8codeMesh(trees_per_dimension; polydeg = 1,
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
                                    mpi_comm())
 
-    basis = LobattoLegendreBasis(RealT, polydeg)
-    nodes = basis.nodes
-
-    num_trees = t8_cmesh_get_num_trees(cmesh)
-
-    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
-                                                    ntuple(_ -> length(nodes), NDIMS)...,
-                                                    num_trees)
-
-    # Get cell length in reference mesh: Omega_ref = [-1,1]^NDIMS.
-    dx = [2 / n for n in trees_per_dimension]
-
     # Non-periodic boundaries.
     boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension))
 
-    if mapping === nothing
-        mapping_ = coordinates2mapping(ntuple(_ -> -1.0, NDIMS), ntuple(_ -> 1.0, NDIMS))
-    else
-        mapping_ = mapping
-    end
-
-    for itree in 1:num_trees
-        veptr = t8_cmesh_get_tree_vertices(cmesh, itree - 1)
-        verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))
-
-        # Calculate node coordinates of reference mesh.
-        if NDIMS == 2
-            cell_x_offset = (verts[1, 1] - 0.5 * (trees_per_dimension[1] - 1)) * dx[1]
-            cell_y_offset = (verts[2, 1] - 0.5 * (trees_per_dimension[2] - 1)) * dx[2]
-
-            for j in eachindex(nodes), i in eachindex(nodes)
-                tree_node_coordinates[:, i, j, itree] .= mapping_(cell_x_offset +
-                                                                  dx[1] * nodes[i] / 2,
-                                                                  cell_y_offset +
-                                                                  dx[2] * nodes[j] / 2)
-            end
-        elseif NDIMS == 3
-            cell_x_offset = (verts[1, 1] - 0.5 * (trees_per_dimension[1] - 1)) * dx[1]
-            cell_y_offset = (verts[2, 1] - 0.5 * (trees_per_dimension[2] - 1)) * dx[2]
-            cell_z_offset = (verts[3, 1] - 0.5 * (trees_per_dimension[3] - 1)) * dx[3]
-
-            for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
-                tree_node_coordinates[:, i, j, k, itree] .= mapping_(cell_x_offset +
-                                                                     dx[1] * nodes[i] / 2,
-                                                                     cell_y_offset +
-                                                                     dx[2] * nodes[j] / 2,
-                                                                     cell_z_offset +
-                                                                     dx[3] * nodes[k] / 2)
-            end
-        end
-
+    for itree in 1:t8_forest_get_num_global_trees(forest)
         if !periodicity[1]
             boundary_names[1, itree] = :x_neg
             boundary_names[2, itree] = :x_pos
@@ -256,8 +325,11 @@ function T8codeMesh(trees_per_dimension; polydeg = 1,
         end
     end
 
-    return T8codeMesh{NDIMS}(forest, tree_node_coordinates, nodes,
-                             boundary_names, "")
+    # Note, `p*est_connectivity_new_brick` convers a domain of `[0,nx] x [0,ny] x ....`.
+    # Hence, transform mesh coordinates to reference space [-1,1]^NDIMS before applying user defined mapping.
+    mapping_(xyz...) = mapping((x * 2.0/tpd - 1.0 for (x,tpd) in zip(xyz, trees_per_dimension))...)
+
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg, mapping = mapping_)
 end
 
 """
@@ -295,106 +367,10 @@ function T8codeMesh(cmesh::Ptr{t8_cmesh};
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
                                    mpi_comm())
 
-    basis = LobattoLegendreBasis(RealT, polydeg)
-    nodes = basis.nodes
-
-    num_trees = t8_cmesh_get_num_trees(cmesh)
-
-    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
-                                                    ntuple(_ -> length(nodes), NDIMS)...,
-                                                    num_trees)
-
-    nodes_in = [-1.0, 1.0]
-    matrix = polynomial_interpolation_matrix(nodes_in, nodes)
-
-    num_local_trees = t8_cmesh_get_num_local_trees(cmesh)
-
-    if NDIMS == 2
-        data_in = Array{RealT, 3}(undef, 2, 2, 2)
-        tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in))
-        verts = zeros(3, 4)
-
-        for itree in 0:(num_local_trees - 1)
-            veptr = t8_cmesh_get_tree_vertices(cmesh, itree)
-
-            # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
-            # sometimes does not work since `veptr` is not necessarily properly
-            # aligned to 8 bytes.
-            for icorner in 1:4
-                verts[1, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 1)
-                verts[2, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 2)
-            end
-
-            # Check if tree's node ordering is right-handed or print a warning.
-            let z = zero(eltype(verts)), o = one(eltype(verts))
-                u = verts[:, 2] - verts[:, 1]
-                v = verts[:, 3] - verts[:, 1]
-                w = [z, z, o]
-
-                # Triple product gives signed volume of spanned parallelepiped.
-                vol = dot(cross(u, v), w)
-
-                if vol < z
-                    @warn "Discovered negative volumes in `cmesh`: vol = $vol"
-                end
-            end
+    # There's no simple and generic way to distinguish boundaries, yet. Name all of them :all.
+    boundary_names = fill(:all, 2 * NDIMS, t8_cmesh_get_num_trees(cmesh))
 
-            # Tree vertices are stored in z-order.
-            @views data_in[:, 1, 1] .= verts[1:2, 1]
-            @views data_in[:, 2, 1] .= verts[1:2, 2]
-            @views data_in[:, 1, 2] .= verts[1:2, 3]
-            @views data_in[:, 2, 2] .= verts[1:2, 4]
-
-            # Interpolate corner coordinates to specified nodes.
-            multiply_dimensionwise!(view(tree_node_coordinates, :, :, :, itree + 1),
-                                    matrix, matrix,
-                                    data_in,
-                                    tmp1)
-        end
-
-    elseif NDIMS == 3
-        data_in = Array{RealT, 4}(undef, 3, 2, 2, 2)
-        tmp1 = zeros(RealT, 3, length(nodes), length(nodes_in), length(nodes_in))
-        verts = zeros(3, 8)
-
-        for itree in 0:(num_trees - 1)
-            veptr = t8_cmesh_get_tree_vertices(cmesh, itree)
-
-            # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
-            # sometimes does not work since `veptr` is not necessarily properly
-            # aligned to 8 bytes.
-            for icorner in 1:8
-                verts[1, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 1)
-                verts[2, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 2)
-                verts[3, icorner] = unsafe_load(veptr, (icorner - 1) * 3 + 3)
-            end
-
-            # Tree vertices are stored in z-order.
-            @views data_in[:, 1, 1, 1] .= verts[1:3, 1]
-            @views data_in[:, 2, 1, 1] .= verts[1:3, 2]
-            @views data_in[:, 1, 2, 1] .= verts[1:3, 3]
-            @views data_in[:, 2, 2, 1] .= verts[1:3, 4]
-
-            @views data_in[:, 1, 1, 2] .= verts[1:3, 5]
-            @views data_in[:, 2, 1, 2] .= verts[1:3, 6]
-            @views data_in[:, 1, 2, 2] .= verts[1:3, 7]
-            @views data_in[:, 2, 2, 2] .= verts[1:3, 8]
-
-            # Interpolate corner coordinates to specified nodes.
-            multiply_dimensionwise!(view(tree_node_coordinates, :, :, :, :, itree + 1),
-                                    matrix, matrix, matrix,
-                                    data_in,
-                                    tmp1)
-        end
-    end
-
-    map_node_coordinates!(tree_node_coordinates, mapping)
-
-    # There's no simple and generic way to distinguish boundaries. Name all of them :all.
-    boundary_names = fill(:all, 2 * NDIMS, num_trees)
-
-    return T8codeMesh{NDIMS}(forest, tree_node_coordinates, nodes,
-                             boundary_names, "")
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg, mapping = mapping)
 end
 
 """
@@ -446,35 +422,188 @@ function T8codeMesh(conn::Ptr{p8est_connectivity}; kwargs...)
 end
 
 """
-    T8codeMesh(meshfile::String, ndims; kwargs...)
+    T8codeMesh(meshfile::String, NDIMS; kwargs...)
+
+Main mesh constructor for the `T8codeMesh` that imports an unstructured, conforming
+mesh from either a Gmsh mesh file (`.msh`) or Abaqus mesh file  (`.inp`) which is determined
+by the file extension.
+
+# Arguments
+- `filepath::String`: path to a Gmsh or Abaqus mesh file.
+- `NDIMS`: Mesh file dimension: `2` or `3`.
+
+# Optional Keyword Arguments
+- `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `polydeg::Integer`: Polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+                      The default of `1` creates an uncurved geometry. Use a higher value if the mapping
+                      will curve the imported uncurved mesh.
+- `RealT::Type`: The type that should be used for coordinates.
+- `initial_refinement_level::Integer`: Refine the mesh uniformly to this level before the simulation starts.
+"""
+function T8codeMesh(filepath::String, NDIMS; kwargs...)
+    # Prevent `t8code` from crashing Julia if the file doesn't exist.
+    @assert isfile(filepath)
+
+    meshfile_prefix, meshfile_suffix = splitext(filepath)
+
+    file_extension = lowercase(meshfile_suffix)
+
+    if file_extension == ".msh"
+      return T8codeMesh(GmshFile{NDIMS}(filepath); kwargs...)
+    end
+
+    if file_extension == ".inp"
+      return T8codeMesh(AbaqusFile{NDIMS}(filepath); kwargs...)
+    end
+
+    throw("Unknown file extension: " * file_extension)
+end
+
+"""
+    T8codeMesh(meshfile::GmshFile{NDIMS}; kwargs...)
 
 Main mesh constructor for the `T8codeMesh` that imports an unstructured, conforming
 mesh from a Gmsh mesh file (`.msh`).
 
 # Arguments
-- `meshfile::String`: path to a Gmsh mesh file.
-- `ndims`: Mesh file dimension: `2` or `3`.
-- `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
+- `meshfile::GmshFile{NDIMS}`: Gmsh mesh file object of dimension NDIMS and give `path` to the file.
+
+# Optional Keyword Arguments
+- `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
              the imported mesh to the physical domain. Use `nothing` for the identity map.
-- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+- `polydeg::Integer`: Polynomial degree used to store the geometry of the mesh.
                       The mapping will be approximated by an interpolation polynomial
                       of the specified degree for each tree.
                       The default of `1` creates an uncurved geometry. Use a higher value if the mapping
                       will curve the imported uncurved mesh.
-- `RealT::Type`: the type that should be used for coordinates.
-- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the simulation starts.
+- `RealT::Type`: The type that should be used for coordinates.
+- `initial_refinement_level::Integer`: Refine the mesh uniformly to this level before the simulation starts.
 """
-function T8codeMesh(meshfile::String, ndims; kwargs...)
+function T8codeMesh(meshfile::GmshFile{NDIMS}; kwargs...) where NDIMS
     # Prevent `t8code` from crashing Julia if the file doesn't exist.
-    @assert isfile(meshfile)
+    @assert isfile(meshfile.path)
 
-    meshfile_prefix, meshfile_suffix = splitext(meshfile)
+    meshfile_prefix, meshfile_suffix = splitext(meshfile.path)
 
-    cmesh = t8_cmesh_from_msh_file(meshfile_prefix, 0, mpi_comm(), ndims, 0, 0)
+    cmesh = t8_cmesh_from_msh_file(meshfile_prefix, 0, mpi_comm(), NDIMS, 0, 0)
 
     return T8codeMesh(cmesh; kwargs...)
 end
 
+"""
+    T8codeMesh(meshfile::AbaqusFile{NDIMS};
+               mapping=nothing, polydeg=1, RealT=Float64,
+               initial_refinement_level=0, unsaved_changes=true,
+               boundary_symbols = nothing)
+
+Main mesh constructor for the `T8codeMesh` that imports an unstructured, conforming
+mesh from an Abaqus mesh file (`.inp`).
+
+To create a curved unstructured mesh `T8codeMesh` two strategies are available:
+
+- `HOHQMesh Abaqus`: High-order, curved boundary information created by
+                     [`HOHQMesh.jl`](https://github.com/trixi-framework/HOHQMesh.jl) is
+                     available in the `meshfile`. The mesh polynomial degree `polydeg`
+                     of the boundaries is provided from the `meshfile`. The computation of
+                     the mapped tree coordinates is done with transfinite interpolation
+                     with linear blending similar to [`UnstructuredMesh2D`](@ref). Boundary name
+                     information is also parsed from the `meshfile` such that different boundary
+                     conditions can be set at each named boundary on a given tree.
+
+- `Standard Abaqus`: By default, with `mapping=nothing` and `polydeg=1`, this creates a
+                     straight-sided from the information parsed from the `meshfile`. If a mapping
+                     function is specified then it computes the mapped tree coordinates via polynomial
+                     interpolants with degree `polydeg`. The mesh created by this function will only
+                     have one boundary `:all` if `boundary_symbols` is not specified.
+                     If `boundary_symbols` is specified the mesh file will be parsed for nodesets defining
+                     the boundary nodes from which boundary edges (2D) and faces (3D) will be assigned.
+
+Note that the `mapping` and `polydeg` keyword arguments are only used by the `HOHQMesh Abaqus` option.
+The `Standard Abaqus` routine obtains the mesh `polydeg` directly from the `meshfile`
+and constructs the transfinite mapping internally.
+
+The particular strategy is selected according to the header present in the `meshfile` where
+the constructor checks whether or not the `meshfile` was created with
+[HOHQMesh.jl](https://github.com/trixi-framework/HOHQMesh.jl).
+If the Abaqus file header is not present in the `meshfile` then the `T8codeMesh` is created
+by `Standard Abaqus`.
+
+The default keyword argument `initial_refinement_level=0` corresponds to a forest
+where the number of trees is the same as the number of elements in the original `meshfile`.
+Increasing the `initial_refinement_level` allows one to uniformly refine the base mesh given
+in the `meshfile` to create a forest with more trees before the simulation begins.
+For example, if a two-dimensional base mesh contains 25 elements then setting
+`initial_refinement_level=1` creates an initial forest of `2^2 * 25 = 100` trees.
+
+# Arguments
+- `meshfile::AbaqusFile{NDIMS}`: Abaqus mesh file object of dimension NDIMS and give `path` to the file.
+
+# Optional Keyword Arguments
+- `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `polydeg::Integer`: Polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+                      The default of `1` creates an uncurved geometry. Use a higher value if the mapping
+                      will curve the imported uncurved mesh.
+- `RealT::Type`: The type that should be used for coordinates.
+- `initial_refinement_level::Integer`: Refine the mesh uniformly to this level before the simulation starts.
+- `boundary_symbols::Vector{Symbol}`: A vector of symbols that correspond to the boundary names in the `meshfile`.
+                                      If `nothing` is passed then all boundaries are named `:all`.                                                
+"""
+function T8codeMesh(meshfile::AbaqusFile{NDIMS};
+                          mapping = nothing, polydeg = 1, RealT = Float64,
+                          initial_refinement_level = 0,
+                          boundary_symbols = nothing) where NDIMS
+    # Prevent `t8code` from crashing Julia if the file doesn't exist.
+    @assert isfile(meshfile.path)
+
+    # Read in the Header of the meshfile to determine which constructor is appropriate.
+    header = open(meshfile.path, "r") do io
+        readline(io) # *Header of the Abaqus file; discarded
+        readline(io) # Readin the actual header information
+    end
+
+    # Check if the meshfile was generated using HOHQMesh.
+    if header == " File created by HOHQMesh"
+        # Mesh curvature and boundary naming is handled with additional information available in meshfile
+        connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_hohqmesh_abaqus(meshfile.path,
+                                                                                              initial_refinement_level,
+                                                                                              NDIMS,
+                                                                                              RealT)
+        # Apply user defined mapping.
+        map_node_coordinates!(tree_node_coordinates, mapping)
+    else
+        # Mesh curvature is handled directly by applying the mapping keyword argument.
+        connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_standard_abaqus(meshfile.path,
+                                                                                              mapping,
+                                                                                              polydeg,
+                                                                                              initial_refinement_level,
+                                                                                              NDIMS,
+                                                                                              RealT,
+                                                                                              boundary_symbols)
+    end
+
+    if typeof(connectivity) <: Ptr{p4est_connectivity}
+      cmesh = t8_cmesh_new_from_p4est(connectivity, mpi_comm(), 0)
+    elseif typeof(connectivity) <: Ptr{p8est_connectivity}
+      cmesh = t8_cmesh_new_from_p8est(connectivity, mpi_comm(), 0)
+    else
+      throw("`connectivity` is not of type `Ptr{p*est_connectivity}`.")
+    end
+    
+    do_face_ghost = mpi_isparallel()
+    scheme = t8_scheme_new_default_cxx()
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
+                                   mpi_comm())
+
+    return T8codeMesh{NDIMS}(forest, tree_node_coordinates, nodes,
+                             boundary_names, "")
+end
+
 struct adapt_callback_passthrough
     adapt_callback::Function
     user_data::Any
diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 4232cf04094..300eaef66c8 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -202,7 +202,7 @@ mkdir(outdir)
                                 3.3228975127030935e-13,
                                 9.592326932761353e-13,
                             ],
-                            tspan=(0.0, 0.1))
+                            tspan=(0.0, 0.1), atol=5.0e-13,)
         # Ensure that we do not have excessive memory allocations 
         # (e.g., from type instabilities)
         let

From 0fed965b941a3ec5877d438cd7545ae8eeeb14b1 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Apr 2024 13:05:31 +0200
Subject: [PATCH 03/89] Updated examples.

---
 .../elixir_advection_amr_unstructured_flag.jl         | 11 +++--------
 .../elixir_advection_unstructured_flag.jl             | 11 +++--------
 examples/t8code_2d_dgsem/elixir_euler_free_stream.jl  |  7 +------
 ...er_source_terms_nonconforming_unstructured_flag.jl |  7 +------
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl          |  7 +------
 .../elixir_advection_amr_unstructured_curved.jl       |  7 +------
 .../elixir_advection_unstructured_curved.jl           |  7 +------
 examples/t8code_3d_dgsem/elixir_euler_ec.jl           |  7 +------
 examples/t8code_3d_dgsem/elixir_euler_free_stream.jl  |  7 +------
 .../elixir_euler_free_stream_extruded.jl              |  7 +------
 ..._source_terms_nonconforming_unstructured_curved.jl |  7 +------
 11 files changed, 15 insertions(+), 70 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 0923e328487..6f43989c56b 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -33,14 +33,9 @@ mapping_flag = Trixi.transfinite_mapping(faces)
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(2))
-
-mesh = T8codeMesh(conn, polydeg = 3,
-                  mapping = mapping_flag,
-                  initial_refinement_level = 1)
+mesh = T8codeMesh(mesh_file, 2;
+                          mapping = mapping_flag, polydeg = 3,
+                          initial_refinement_level = 1)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions = boundary_conditions)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
index ba8f1b59b80..025493a1708 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -30,14 +30,9 @@ mapping_flag = Trixi.transfinite_mapping(faces)
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(2))
-
-mesh = T8codeMesh(conn, polydeg = 3,
-                  mapping = mapping_flag,
-                  initial_refinement_level = 2)
+mesh = T8codeMesh(mesh_file, 2;
+                          mapping = mapping_flag, polydeg = 3,
+                          initial_refinement_level = 2)
 
 # A semidiscretization collects data structures and functions for the spatial discretization.
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
diff --git a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
index 5e6c4193c50..d9d2c65d988 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
@@ -32,12 +32,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/a075f8ec39a67fa9fad8f6f84342cbca/raw/a7206a02ed3a5d3cadacd8d9694ac154f9151db7/square_unstructured_1.inp",
                            joinpath(@__DIR__, "square_unstructured_1.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(2))
-
-mesh = T8codeMesh(conn, polydeg = 3,
+mesh = T8codeMesh(mesh_file, 2; polydeg = 3,
                   mapping = mapping,
                   initial_refinement_level = 1)
 
diff --git a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
index e496eb76729..48684071d4b 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
@@ -32,12 +32,7 @@ mapping_flag = Trixi.transfinite_mapping(faces)
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(2))
-
-mesh = T8codeMesh(conn, polydeg = 3,
+mesh = T8codeMesh(mesh_file, 2; polydeg = 3,
                   mapping = mapping_flag,
                   initial_refinement_level = 1)
 
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
index ff2e40ae607..592d5b15a85 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -70,12 +70,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(2))
-
-mesh = T8codeMesh(conn, polydeg = 4,
+mesh = T8codeMesh(mesh_file, 2; polydeg = 4,
                   mapping = mapping_twist,
                   initial_refinement_level = 1)
 
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
index e7c0f4b7318..1f9aa3449b0 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
@@ -50,12 +50,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/b8df0033798e4926dec515fc045e8c2c/raw/b9254cde1d1fb64b6acc8416bc5ccdd77a240227/cube_unstructured_2.inp",
                            joinpath(@__DIR__, "cube_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connectivity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
-mesh = T8codeMesh(conn, polydeg = 2,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 2,
                   mapping = mapping,
                   initial_refinement_level = 1)
 
diff --git a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
index ee27ee117fe..fe6aa48e7d9 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
@@ -47,12 +47,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/d45c8ac1e248618885fa7cc31a50ab40/raw/37fba24890ab37cfa49c39eae98b44faf4502882/cube_unstructured_1.inp",
                            joinpath(@__DIR__, "cube_unstructured_1.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connectivity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
-mesh = T8codeMesh(conn, polydeg = 3,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 3,
                   mapping = mapping,
                   initial_refinement_level = 2)
 
diff --git a/examples/t8code_3d_dgsem/elixir_euler_ec.jl b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
index b720bfcd375..e1e4d850a86 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_ec.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
@@ -47,12 +47,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/b8df0033798e4926dec515fc045e8c2c/raw/b9254cde1d1fb64b6acc8416bc5ccdd77a240227/cube_unstructured_2.inp",
                            joinpath(@__DIR__, "cube_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connectivity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
-mesh = T8codeMesh(conn, polydeg = 5,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 5,
                   mapping = mapping,
                   initial_refinement_level = 0)
 
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
index b70a6091adf..882e3aebebe 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
@@ -48,12 +48,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/d45c8ac1e248618885fa7cc31a50ab40/raw/37fba24890ab37cfa49c39eae98b44faf4502882/cube_unstructured_1.inp",
                            joinpath(@__DIR__, "cube_unstructured_1.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connectivity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
-mesh = T8codeMesh(conn, polydeg = 2,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 2,
                   mapping = mapping,
                   initial_refinement_level = 0)
 
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
index 6ae38d20b5a..777cccf7ad7 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
@@ -37,12 +37,7 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/b8df0033798e4926dec515fc045e8c2c/raw/b9254cde1d1fb64b6acc8416bc5ccdd77a240227/cube_unstructured_2.inp",
                            joinpath(@__DIR__, "cube_unstructured_2.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
-mesh = T8codeMesh(conn, polydeg = 3,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 3,
                   mapping = mapping,
                   initial_refinement_level = 0)
 
diff --git a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
index 6856be36ea1..a06e7927dd0 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
@@ -50,13 +50,8 @@ end
 mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/d45c8ac1e248618885fa7cc31a50ab40/raw/37fba24890ab37cfa49c39eae98b44faf4502882/cube_unstructured_1.inp",
                            joinpath(@__DIR__, "cube_unstructured_1.inp"))
 
-# INP mesh files are only support by p4est. Hence, we
-# create a p4est connecvity object first from which
-# we can create a t8code mesh.
-conn = Trixi.read_inp_p4est(mesh_file, Val(3))
-
 # Mesh polydeg of 2 (half the solver polydeg) to ensure FSP (see above).
-mesh = T8codeMesh(conn, polydeg = 2,
+mesh = T8codeMesh(mesh_file, 3; polydeg = 2,
                   mapping = mapping,
                   initial_refinement_level = 0)
 

From c50b434e8d9408c641acba3f63d39d4d701f9651 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Apr 2024 13:18:30 +0200
Subject: [PATCH 04/89] Fixed typos.

---
 src/meshes/t8code_mesh.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 72f7cf337b1..69ddb9a0bf7 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -106,7 +106,7 @@ end
 Create a 'T8codeMesh'.
 
 # Arguments
-- 'forest': Pointer to a commited forest.
+- 'forest': Pointer to a committed forest.
 - 'boundary_names': List of boundary names.
 - 'polydeg::Integer': Polynomial degree used to store the geometry of the mesh.
                       The mapping will be approximated by an interpolation polynomial
@@ -325,7 +325,7 @@ function T8codeMesh(trees_per_dimension; polydeg = 1,
         end
     end
 
-    # Note, `p*est_connectivity_new_brick` convers a domain of `[0,nx] x [0,ny] x ....`.
+    # Note, `p*est_connectivity_new_brick` converts a domain of `[0,nx] x [0,ny] x ....`.
     # Hence, transform mesh coordinates to reference space [-1,1]^NDIMS before applying user defined mapping.
     mapping_(xyz...) = mapping((x * 2.0/tpd - 1.0 for (x,tpd) in zip(xyz, trees_per_dimension))...)
 

From 5e186563293bdf3dbb9f883b81400c9a00a40f58 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Apr 2024 13:20:44 +0200
Subject: [PATCH 05/89] Applied formatter.

---
 .../elixir_advection_amr_unstructured_flag.jl |  4 +-
 .../elixir_advection_unstructured_flag.jl     |  4 +-
 src/auxiliary/auxiliary.jl                    |  5 +-
 src/meshes/p4est_mesh.jl                      | 25 ++++----
 src/meshes/t8code_mesh.jl                     | 62 ++++++++++---------
 5 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 6f43989c56b..9138586cccf 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -34,8 +34,8 @@ mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2e
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
 mesh = T8codeMesh(mesh_file, 2;
-                          mapping = mapping_flag, polydeg = 3,
-                          initial_refinement_level = 1)
+                  mapping = mapping_flag, polydeg = 3,
+                  initial_refinement_level = 1)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions = boundary_conditions)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
index 025493a1708..e512f328234 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -31,8 +31,8 @@ mesh_file = Trixi.download("https://gist.githubusercontent.com/efaulhaber/63ff2e
                            joinpath(@__DIR__, "square_unstructured_2.inp"))
 
 mesh = T8codeMesh(mesh_file, 2;
-                          mapping = mapping_flag, polydeg = 3,
-                          initial_refinement_level = 2)
+                  mapping = mapping_flag, polydeg = 3,
+                  initial_refinement_level = 2)
 
 # A semidiscretization collects data structures and functions for the spatial discretization.
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 34e7ccbbfdb..f3c54249816 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -374,11 +374,10 @@ end
 abstract type MeshFile{NDIMS} end
 
 struct GmshFile{NDIMS} <: MeshFile{NDIMS}
-  path :: String
+    path::String
 end
 
 struct AbaqusFile{NDIMS} <: MeshFile{NDIMS}
-  path :: String
+    path::String
 end
-
 end # @muladd
diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl
index 61713867e0f..6bb98196231 100644
--- a/src/meshes/p4est_mesh.jl
+++ b/src/meshes/p4est_mesh.jl
@@ -392,9 +392,9 @@ end
 function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
                                          n_dimensions, RealT)
     connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_hohqmesh_abaqus(meshfile,
-                                                                                          initial_refinement_level,
-                                                                                          n_dimensions,
-                                                                                          RealT)
+                                                                                                         initial_refinement_level,
+                                                                                                         n_dimensions,
+                                                                                                         RealT)
 
     p4est = new_p4est(connectivity, initial_refinement_level)
 
@@ -407,12 +407,12 @@ function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
                                          initial_refinement_level, n_dimensions, RealT,
                                          boundary_symbols)
     connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_standard_abaqus(meshfile,
-                                                                                  mapping,
-                                                                                  polydeg,
-                                                                                  initial_refinement_level,
-                                                                                  n_dimensions,
-                                                                                  RealT,
-                                                                                  boundary_symbols)
+                                                                                                         mapping,
+                                                                                                         polydeg,
+                                                                                                         initial_refinement_level,
+                                                                                                         n_dimensions,
+                                                                                                         RealT,
+                                                                                                         boundary_symbols)
 
     p4est = new_p4est(connectivity, initial_refinement_level)
 
@@ -424,7 +424,7 @@ end
 # the boundary names on each tree are provided by the `meshfile` created by
 # [`HOHQMesh.jl`](https://github.com/trixi-framework/HOHQMesh.jl).
 function p4est_connectivity_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
-                                         n_dimensions, RealT)
+                                                 n_dimensions, RealT)
     # Create the mesh connectivity using `p4est`
     connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
     connectivity_pw = PointerWrapper(connectivity)
@@ -480,8 +480,9 @@ end
 # the `mapping` passed to this function using polynomial interpolants of degree `polydeg`. All boundary
 # names are given the name `:all`.
 function p4est_connectivity_from_standard_abaqus(meshfile, mapping, polydeg,
-                                         initial_refinement_level, n_dimensions, RealT,
-                                         boundary_symbols)
+                                                 initial_refinement_level, n_dimensions,
+                                                 RealT,
+                                                 boundary_symbols)
     # Create the mesh connectivity using `p4est`
     connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
     connectivity_pw = PointerWrapper(connectivity)
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 69ddb9a0bf7..be76fb3f56b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,7 +26,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
-    function T8codeMesh{NDIMS}(forest :: Ptr{t8_forest}, tree_node_coordinates, nodes,
+    function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
                                current_filename) where {NDIMS}
         is_parallel = mpi_isparallel() ? True() : False()
@@ -114,7 +114,8 @@ Create a 'T8codeMesh'.
 - `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
              the imported mesh to the physical domain. Use `nothing` for the identity map.
 """
-function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; polydeg = 1, mapping = nothing) where {NDIMS, RealT}
+function T8codeMesh{NDIMS, RealT}(forest::Ptr{t8_forest}, boundary_names; polydeg = 1,
+                                  mapping = nothing) where {NDIMS, RealT}
     # In t8code reference space is [0,1].
     basis = LobattoLegendreBasis(RealT, polydeg)
     nodes = 0.5 .* (basis.nodes .+ 1.0)
@@ -135,7 +136,7 @@ function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; poly
         # Testing for negative element volumes.
         verts = zeros(3, num_corners)
         for itree in 1:num_trees
-            veptr = t8_cmesh_get_tree_vertices(cmesh, itree-1)
+            veptr = t8_cmesh_get_tree_vertices(cmesh, itree - 1)
 
             # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
             # sometimes does not work since `veptr` is not necessarily properly
@@ -164,7 +165,8 @@ function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; poly
                 coords_ref[1] = nodes[i]
                 coords_ref[2] = nodes[j]
                 coords_ref[3] = 0.0
-                t8_geometry_evaluate(cmesh, itree-1, coords_ref, 1, @view(tree_node_coordinates[:, i, j, itree]))
+                t8_geometry_evaluate(cmesh, itree - 1, coords_ref, 1,
+                                     @view(tree_node_coordinates[:, i, j, itree]))
             end
         end
 
@@ -174,7 +176,7 @@ function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; poly
         # Testing for negative element volumes.
         verts = zeros(3, num_corners)
         for itree in 1:num_trees
-            veptr = t8_cmesh_get_tree_vertices(cmesh, itree-1)
+            veptr = t8_cmesh_get_tree_vertices(cmesh, itree - 1)
 
             # Note, `verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))`
             # sometimes does not work since `veptr` is not necessarily properly
@@ -204,7 +206,8 @@ function T8codeMesh{NDIMS, RealT}(forest :: Ptr{t8_forest}, boundary_names; poly
                 coords_ref[1] = nodes[i]
                 coords_ref[2] = nodes[j]
                 coords_ref[3] = nodes[k]
-                t8_geometry_evaluate(cmesh, itree-1, coords_ref, 1, @view(tree_node_coordinates[:, i, j, k, itree]))
+                t8_geometry_evaluate(cmesh, itree - 1, coords_ref, 1,
+                                     @view(tree_node_coordinates[:, i, j, k, itree]))
             end
         end
     end
@@ -327,9 +330,11 @@ function T8codeMesh(trees_per_dimension; polydeg = 1,
 
     # Note, `p*est_connectivity_new_brick` converts a domain of `[0,nx] x [0,ny] x ....`.
     # Hence, transform mesh coordinates to reference space [-1,1]^NDIMS before applying user defined mapping.
-    mapping_(xyz...) = mapping((x * 2.0/tpd - 1.0 for (x,tpd) in zip(xyz, trees_per_dimension))...)
+    mapping_(xyz...) = mapping((x * 2.0 / tpd - 1.0 for (x, tpd) in zip(xyz,
+                                                                        trees_per_dimension))...)
 
-    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg, mapping = mapping_)
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg,
+                                    mapping = mapping_)
 end
 
 """
@@ -370,7 +375,8 @@ function T8codeMesh(cmesh::Ptr{t8_cmesh};
     # There's no simple and generic way to distinguish boundaries, yet. Name all of them :all.
     boundary_names = fill(:all, 2 * NDIMS, t8_cmesh_get_num_trees(cmesh))
 
-    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg, mapping = mapping)
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg,
+                                    mapping = mapping)
 end
 
 """
@@ -452,11 +458,11 @@ function T8codeMesh(filepath::String, NDIMS; kwargs...)
     file_extension = lowercase(meshfile_suffix)
 
     if file_extension == ".msh"
-      return T8codeMesh(GmshFile{NDIMS}(filepath); kwargs...)
+        return T8codeMesh(GmshFile{NDIMS}(filepath); kwargs...)
     end
 
     if file_extension == ".inp"
-      return T8codeMesh(AbaqusFile{NDIMS}(filepath); kwargs...)
+        return T8codeMesh(AbaqusFile{NDIMS}(filepath); kwargs...)
     end
 
     throw("Unknown file extension: " * file_extension)
@@ -482,7 +488,7 @@ mesh from a Gmsh mesh file (`.msh`).
 - `RealT::Type`: The type that should be used for coordinates.
 - `initial_refinement_level::Integer`: Refine the mesh uniformly to this level before the simulation starts.
 """
-function T8codeMesh(meshfile::GmshFile{NDIMS}; kwargs...) where NDIMS
+function T8codeMesh(meshfile::GmshFile{NDIMS}; kwargs...) where {NDIMS}
     # Prevent `t8code` from crashing Julia if the file doesn't exist.
     @assert isfile(meshfile.path)
 
@@ -555,9 +561,9 @@ For example, if a two-dimensional base mesh contains 25 elements then setting
                                       If `nothing` is passed then all boundaries are named `:all`.                                                
 """
 function T8codeMesh(meshfile::AbaqusFile{NDIMS};
-                          mapping = nothing, polydeg = 1, RealT = Float64,
-                          initial_refinement_level = 0,
-                          boundary_symbols = nothing) where NDIMS
+                    mapping = nothing, polydeg = 1, RealT = Float64,
+                    initial_refinement_level = 0,
+                    boundary_symbols = nothing) where {NDIMS}
     # Prevent `t8code` from crashing Julia if the file doesn't exist.
     @assert isfile(meshfile.path)
 
@@ -571,30 +577,30 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
     if header == " File created by HOHQMesh"
         # Mesh curvature and boundary naming is handled with additional information available in meshfile
         connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_hohqmesh_abaqus(meshfile.path,
-                                                                                              initial_refinement_level,
-                                                                                              NDIMS,
-                                                                                              RealT)
+                                                                                                             initial_refinement_level,
+                                                                                                             NDIMS,
+                                                                                                             RealT)
         # Apply user defined mapping.
         map_node_coordinates!(tree_node_coordinates, mapping)
     else
         # Mesh curvature is handled directly by applying the mapping keyword argument.
         connectivity, tree_node_coordinates, nodes, boundary_names = p4est_connectivity_from_standard_abaqus(meshfile.path,
-                                                                                              mapping,
-                                                                                              polydeg,
-                                                                                              initial_refinement_level,
-                                                                                              NDIMS,
-                                                                                              RealT,
-                                                                                              boundary_symbols)
+                                                                                                             mapping,
+                                                                                                             polydeg,
+                                                                                                             initial_refinement_level,
+                                                                                                             NDIMS,
+                                                                                                             RealT,
+                                                                                                             boundary_symbols)
     end
 
     if typeof(connectivity) <: Ptr{p4est_connectivity}
-      cmesh = t8_cmesh_new_from_p4est(connectivity, mpi_comm(), 0)
+        cmesh = t8_cmesh_new_from_p4est(connectivity, mpi_comm(), 0)
     elseif typeof(connectivity) <: Ptr{p8est_connectivity}
-      cmesh = t8_cmesh_new_from_p8est(connectivity, mpi_comm(), 0)
+        cmesh = t8_cmesh_new_from_p8est(connectivity, mpi_comm(), 0)
     else
-      throw("`connectivity` is not of type `Ptr{p*est_connectivity}`.")
+        throw("`connectivity` is not of type `Ptr{p*est_connectivity}`.")
     end
-    
+
     do_face_ghost = mpi_isparallel()
     scheme = t8_scheme_new_default_cxx()
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,

From 3bb6292e01f50ad8bb2a19dc9a807bb8f5015501 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Wed, 13 Mar 2024 17:11:51 +0100
Subject: [PATCH 06/89] cubed sphere test case, copied from p4est

---
 .../elixir_advection_cubed_sphere.jl          | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl

diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
new file mode 100644
index 00000000000..9f9876d2753
--- /dev/null
+++ b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
@@ -0,0 +1,61 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7, 0.5)
+equations = LinearScalarAdvectionEquation3D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+initial_condition = initial_condition_convergence_test
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:inside => boundary_condition,
+                           :outside => boundary_condition)
+
+mesh = Trixi.T8codeMeshCubedSphere(5, 3, 0.5, 0.5;
+                                   polydeg = 3, initial_refinement_level = 0)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.2)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()

From abbf702d5e426670fc63cea5c7229f59d1b1bbe5 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Wed, 27 Mar 2024 10:28:29 +0100
Subject: [PATCH 07/89] add baroclinic instability (copy of p4est)

---
 .../elixir_euler_baroclinic_instability.jl    | 299 ++++++++++++++++++
 1 file changed, 299 insertions(+)
 create mode 100644 examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl

diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
new file mode 100644
index 00000000000..128f1e38a5b
--- /dev/null
+++ b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
@@ -0,0 +1,299 @@
+# An idealized baroclinic instability test case
+# For optimal results consider increasing the resolution to 16x16x8 trees per cube face.
+#
+# Note that this elixir can take several hours to run.
+# Using 24 threads of an AMD Ryzen Threadripper 3990X (more threads don't speed it up further)
+# and `check-bounds=no`, this elixirs takes about one hour to run.
+# With 16x16x8 trees per cube face on the same machine, it takes about 28 hours.
+#
+# References:
+# - Paul A. Ullrich, Thomas Melvin, Christiane Jablonowski, Andrew Staniforth (2013)
+#   A proposed baroclinic wave test case for deep- and shallow-atmosphere dynamical cores
+#   https://doi.org/10.1002/qj.2241
+
+using OrdinaryDiffEq
+using Trixi
+using LinearAlgebra
+
+###############################################################################
+# Setup for the baroclinic instability test
+gamma = 1.4
+equations = CompressibleEulerEquations3D(gamma)
+
+# Initial condition for an idealized baroclinic instability test
+# https://doi.org/10.1002/qj.2241, Section 3.2 and Appendix A
+function initial_condition_baroclinic_instability(x, t,
+                                                  equations::CompressibleEulerEquations3D)
+    lon, lat, r = cartesian_to_sphere(x)
+    radius_earth = 6.371229e6
+    # Make sure that the r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+
+    # Unperturbed basic state
+    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+
+    # Stream function type perturbation
+    u_perturbation, v_perturbation = perturbation_stream_function(lon, lat, z)
+
+    u += u_perturbation
+    v = v_perturbation
+
+    # Convert spherical velocity to Cartesian
+    v1 = -sin(lon) * u - sin(lat) * cos(lon) * v
+    v2 = cos(lon) * u - sin(lat) * sin(lon) * v
+    v3 = cos(lat) * v
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+# Steady state for RHS correction below
+function steady_state_baroclinic_instability(x, t, equations::CompressibleEulerEquations3D)
+    lon, lat, r = cartesian_to_sphere(x)
+    radius_earth = 6.371229e6
+    # Make sure that the r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+
+    # Unperturbed basic state
+    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+
+    # Convert spherical velocity to Cartesian
+    v1 = -sin(lon) * u
+    v2 = cos(lon) * u
+    v3 = 0.0
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+function cartesian_to_sphere(x)
+    r = norm(x)
+    lambda = atan(x[2], x[1])
+    if lambda < 0
+        lambda += 2 * pi
+    end
+    phi = asin(x[3] / r)
+
+    return lambda, phi, r
+end
+
+# Unperturbed balanced steady-state.
+# Returns primitive variables with only the velocity in longitudinal direction (rho, u, p).
+# The other velocity components are zero.
+function basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+    # Parameters from Table 1 in the paper
+    # Corresponding names in the paper are commented
+    radius_earth = 6.371229e6  # a
+    half_width_parameter = 2           # b
+    gravitational_acceleration = 9.80616     # g
+    k = 3           # k
+    surface_pressure = 1e5         # p₀
+    gas_constant = 287         # R
+    surface_equatorial_temperature = 310.0       # T₀ᴱ
+    surface_polar_temperature = 240.0       # T₀ᴾ
+    lapse_rate = 0.005       # Γ
+    angular_velocity = 7.29212e-5  # Ω
+
+    # Distance to the center of the Earth
+    r = z + radius_earth
+
+    # In the paper: T₀
+    temperature0 = 0.5 * (surface_equatorial_temperature + surface_polar_temperature)
+    # In the paper: A, B, C, H
+    const_a = 1 / lapse_rate
+    const_b = (temperature0 - surface_polar_temperature) /
+              (temperature0 * surface_polar_temperature)
+    const_c = 0.5 * (k + 2) * (surface_equatorial_temperature - surface_polar_temperature) /
+              (surface_equatorial_temperature * surface_polar_temperature)
+    const_h = gas_constant * temperature0 / gravitational_acceleration
+
+    # In the paper: (r - a) / bH
+    scaled_z = z / (half_width_parameter * const_h)
+
+    # Temporary variables
+    temp1 = exp(lapse_rate / temperature0 * z)
+    temp2 = exp(-scaled_z^2)
+
+    # In the paper: ̃τ₁, ̃τ₂
+    tau1 = const_a * lapse_rate / temperature0 * temp1 +
+           const_b * (1 - 2 * scaled_z^2) * temp2
+    tau2 = const_c * (1 - 2 * scaled_z^2) * temp2
+
+    # In the paper: ∫τ₁(r') dr', ∫τ₂(r') dr'
+    inttau1 = const_a * (temp1 - 1) + const_b * z * temp2
+    inttau2 = const_c * z * temp2
+
+    # Temporary variables
+    temp3 = r / radius_earth * cos(lat)
+    temp4 = temp3^k - k / (k + 2) * temp3^(k + 2)
+
+    # In the paper: T
+    temperature = 1 / ((r / radius_earth)^2 * (tau1 - tau2 * temp4))
+
+    # In the paper: U, u (zonal wind, first component of spherical velocity)
+    big_u = gravitational_acceleration / radius_earth * k * temperature * inttau2 *
+            (temp3^(k - 1) - temp3^(k + 1))
+    temp5 = radius_earth * cos(lat)
+    u = -angular_velocity * temp5 + sqrt(angular_velocity^2 * temp5^2 + temp5 * big_u)
+
+    # Hydrostatic pressure
+    p = surface_pressure *
+        exp(-gravitational_acceleration / gas_constant * (inttau1 - inttau2 * temp4))
+
+    # Density (via ideal gas law)
+    rho = p / (gas_constant * temperature)
+
+    return rho, u, p
+end
+
+# Perturbation as in Equations 25 and 26 of the paper (analytical derivative)
+function perturbation_stream_function(lon, lat, z)
+    # Parameters from Table 1 in the paper
+    # Corresponding names in the paper are commented
+    perturbation_radius = 1 / 6      # d₀ / a
+    perturbed_wind_amplitude = 1.0      # Vₚ
+    perturbation_lon = pi / 9     # Longitude of perturbation location
+    perturbation_lat = 2 * pi / 9 # Latitude of perturbation location
+    pertz = 15000    # Perturbation height cap
+
+    # Great circle distance (d in the paper) divided by a (radius of the Earth)
+    # because we never actually need d without dividing by a
+    great_circle_distance_by_a = acos(sin(perturbation_lat) * sin(lat) +
+                                      cos(perturbation_lat) * cos(lat) *
+                                      cos(lon - perturbation_lon))
+
+    # In the first case, the vertical taper function is per definition zero.
+    # In the second case, the stream function is per definition zero.
+    if z > pertz || great_circle_distance_by_a > perturbation_radius
+        return 0.0, 0.0
+    end
+
+    # Vertical tapering of stream function
+    perttaper = 1.0 - 3 * z^2 / pertz^2 + 2 * z^3 / pertz^3
+
+    # sin/cos(pi * d / (2 * d_0)) in the paper
+    sin_, cos_ = sincos(0.5 * pi * great_circle_distance_by_a / perturbation_radius)
+
+    # Common factor for both u and v
+    factor = 16 / (3 * sqrt(3)) * perturbed_wind_amplitude * perttaper * cos_^3 * sin_
+
+    u_perturbation = -factor * (-sin(perturbation_lat) * cos(lat) +
+                      cos(perturbation_lat) * sin(lat) * cos(lon - perturbation_lon)) /
+                     sin(great_circle_distance_by_a)
+
+    v_perturbation = factor * cos(perturbation_lat) * sin(lon - perturbation_lon) /
+                     sin(great_circle_distance_by_a)
+
+    return u_perturbation, v_perturbation
+end
+
+@inline function source_terms_baroclinic_instability(u, x, t,
+                                                     equations::CompressibleEulerEquations3D)
+    radius_earth = 6.371229e6  # a
+    gravitational_acceleration = 9.80616     # g
+    angular_velocity = 7.29212e-5  # Ω
+
+    r = norm(x)
+    # Make sure that r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+    r = z + radius_earth
+
+    du1 = zero(eltype(u))
+
+    # Gravity term
+    temp = -gravitational_acceleration * radius_earth^2 / r^3
+    du2 = temp * u[1] * x[1]
+    du3 = temp * u[1] * x[2]
+    du4 = temp * u[1] * x[3]
+    du5 = temp * (u[2] * x[1] + u[3] * x[2] + u[4] * x[3])
+
+    # Coriolis term, -2Ω × ρv = -2 * angular_velocity * (0, 0, 1) × u[2:4]
+    du2 -= -2 * angular_velocity * u[3]
+    du3 -= 2 * angular_velocity * u[2]
+
+    return SVector(du1, du2, du3, du4, du5)
+end
+
+###############################################################################
+# Start of the actual elixir, semidiscretization of the problem
+
+initial_condition = initial_condition_baroclinic_instability
+
+boundary_conditions = Dict(:inside => boundary_condition_slip_wall,
+                           :outside => boundary_condition_slip_wall)
+
+# This is a good estimate for the speed of sound in this example.
+# Other values between 300 and 400 should work as well.
+surface_flux = FluxLMARS(340)
+volume_flux = flux_kennedy_gruber
+solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
+               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+
+# For optimal results, use (16, 8) here
+trees_per_cube_face = (8, 4)
+mesh = Trixi.T8codeMeshCubedSphere(trees_per_cube_face..., 6.371229e6, 30000.0,
+                                   polydeg = 5, initial_refinement_level = 0)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms_baroclinic_instability,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10 * 24 * 60 * 60.0) # time in seconds for 10 days
+
+# Save RHS of the steady state and subtract it in every RHS evaluation.
+# This trick preserves the steady state exactly (to machine rounding errors, of course).
+# Otherwise, this elixir produces entirely unusable results for a resolution of 8x8x4 cells
+# per cube face with a polydeg of 3.
+# With this trick, even the polydeg 3 simulation produces usable (although badly resolved) results,
+# and most of the grid imprinting in higher polydeg simulation is eliminated.
+#
+# See https://github.com/trixi-framework/Trixi.jl/issues/980 for more information.
+u_steady_state = compute_coefficients(steady_state_baroclinic_instability, tspan[1], semi)
+# Use a `let` block for performance (otherwise du_steady_state will be a global variable)
+let du_steady_state = similar(u_steady_state)
+    # Save RHS of the steady state
+    Trixi.rhs!(du_steady_state, u_steady_state, semi, tspan[1])
+
+    global function corrected_rhs!(du, u, semi, t)
+        # Normal RHS evaluation
+        Trixi.rhs!(du, u, semi, t)
+        # Correct by subtracting the steady-state RHS
+        Trixi.@trixi_timeit Trixi.timer() "rhs correction" begin
+            # Use Trixi.@threaded for threaded performance
+            Trixi.@threaded for i in eachindex(du)
+                du[i] -= du_steady_state[i]
+            end
+        end
+    end
+end
+u0 = compute_coefficients(tspan[1], semi)
+ode = ODEProblem(corrected_rhs!, u0, tspan, semi)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 5000
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+#save_solution = SaveSolutionCallback(interval = 5000,
+#                                     save_initial_solution = true,
+#                                     save_final_solution = true,
+#                                     solution_variables = cons2prim)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+#                        , save_solution)
+
+###############################################################################
+# run the simulation
+
+# Use a Runge-Kutta method with automatic (error based) time step size control
+# Enable threading of the RK method for better performance on multiple threads
+sol = solve(ode, RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()); abstol = 1.0e-6,
+            reltol = 1.0e-6,
+            ode_default_options()..., callback = callbacks);
+
+summary_callback() # print the timer summary

From 5fa48e83ae95e9197b0d4faddc327bda95c91a4c Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 9 Apr 2024 13:13:08 +0200
Subject: [PATCH 08/89] add cubed sphere constructor

---
 src/meshes/t8code_mesh.jl | 45 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index be76fb3f56b..007da861f5b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -610,6 +610,51 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
                              boundary_names, "")
 end
 
+"""
+T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
+                      polydeg, RealT=Float64, initial_refinement_level=0)
+
+Construct a cubed spherical shell of given inner radius and thickness as `T8codeMesh` with
+`6 * trees_per_face_dimension^2 * layers` trees. The mesh will have two boundaries,
+`:inside` and `:outside`.
+
+# Arguments
+- `trees_per_face_dimension::Integer`: the number of trees in the first two local
+                                       dimensions of each face.
+- `layers::Integer`: the number of trees in the third local dimension of each face, i.e.,
+                     the number of layers of the shell.
+- `inner_radius::Float64`: Radius of the inner side of the shell.
+- `thickness::Float64`: Thickness of the shell. The outer radius will be
+                        `inner_radius + thickness`.
+- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+- `RealT::Type`: the type that should be used for coordinates.
+- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the
+                                       simulation starts.
+"""
+function T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
+                               polydeg, RealT = Float64, initial_refinement_level = 0)
+    NDIMS = 3
+    cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness,
+                                               trees_per_face_dimension, layers, mpi_comm())
+    do_face_ghost = mpi_isparallel()
+    scheme = t8_scheme_new_default_cxx()
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
+                                   mpi_comm())
+
+    num_trees = t8_cmesh_get_num_trees(cmesh)
+    # TODO: Init?!
+    boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees)
+    for itree in 1:num_trees
+        # TODO: z-direction == radial direction in each tree?
+        boundary_names[5, itree] = :inside
+        boundary_names[6, itree] = :outside
+    end
+
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg)
+end
+
 struct adapt_callback_passthrough
     adapt_callback::Function
     user_data::Any

From cd27998913e96218e37844b1622b06d521b3c705 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 19 Apr 2024 09:56:15 +0200
Subject: [PATCH 09/89] Fix indentation.

---
 src/solvers/dgsem_t8code/containers_2d.jl | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/solvers/dgsem_t8code/containers_2d.jl b/src/solvers/dgsem_t8code/containers_2d.jl
index ce525bfdf65..1fdee28fc72 100644
--- a/src/solvers/dgsem_t8code/containers_2d.jl
+++ b/src/solvers/dgsem_t8code/containers_2d.jl
@@ -41,22 +41,14 @@ function calc_node_coordinates!(node_coordinates,
             t8_element_vertex_reference_coords(eclass_scheme, element, 0,
                                                pointer(element_coords))
 
-            nodes_out_x = 2 *
-                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[1]) .-
-                          1
-            nodes_out_y = 2 *
-                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[2]) .-
-                          1
+            nodes_out_x = 2 * (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[1]) .- 1
+            nodes_out_y = 2 * (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[2]) .- 1
 
-            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
-                                             baryweights_in)
-            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
-                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in)
 
             multiply_dimensionwise!(view(node_coordinates, :, :, :, current_index += 1),
-                                    matrix1, matrix2,
-                                    view(mesh.tree_node_coordinates, :, :, :,
-                                         global_itree + 1),
+                                    matrix1, matrix2, view(mesh.tree_node_coordinates, :, :, :, global_itree + 1),
                                     tmp1)
         end
     end

From f6653b0fc95adfab7a08c12ceab048bcf82f5b01 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 19 Apr 2024 09:58:15 +0200
Subject: [PATCH 10/89] Fix indentation.

---
 src/solvers/dgsem_t8code/containers_3d.jl | 32 +++++++----------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/solvers/dgsem_t8code/containers_3d.jl b/src/solvers/dgsem_t8code/containers_3d.jl
index 4d56bc734aa..fddb2d01478 100644
--- a/src/solvers/dgsem_t8code/containers_3d.jl
+++ b/src/solvers/dgsem_t8code/containers_3d.jl
@@ -43,29 +43,17 @@ function calc_node_coordinates!(node_coordinates,
             t8_element_vertex_reference_coords(eclass_scheme, element, 0,
                                                pointer(element_coords))
 
-            nodes_out_x = (2 *
-                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[1]) .-
-                           1)
-            nodes_out_y = (2 *
-                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[2]) .-
-                           1)
-            nodes_out_z = (2 *
-                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[3]) .-
-                           1)
-
-            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
-                                             baryweights_in)
-            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
-                                             baryweights_in)
-            polynomial_interpolation_matrix!(matrix3, mesh.nodes, nodes_out_z,
-                                             baryweights_in)
-
-            multiply_dimensionwise!(view(node_coordinates, :, :, :, :,
-                                         current_index += 1),
+            nodes_out_x = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[1]) .- 1)
+            nodes_out_y = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[2]) .- 1)
+            nodes_out_z = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[3]) .- 1)
+
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in)
+            polynomial_interpolation_matrix!(matrix3, mesh.nodes, nodes_out_z, baryweights_in)
+
+            multiply_dimensionwise!(view(node_coordinates, :, :, :, :, current_index += 1),
                                     matrix1, matrix2, matrix3,
-                                    view(mesh.tree_node_coordinates, :, :, :, :,
-                                         global_itree + 1),
-                                    tmp1)
+                                    view(mesh.tree_node_coordinates, :, :, :, :, global_itree + 1), tmp1)
         end
     end
 

From aa4d0d4fecb7eccf3d64a43d0ca413d83136f660 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 19 Apr 2024 10:36:17 +0200
Subject: [PATCH 11/89] Switching off formatter in two files.

---
 src/solvers/dgsem_t8code/containers_2d.jl | 1 +
 src/solvers/dgsem_t8code/containers_3d.jl | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/solvers/dgsem_t8code/containers_2d.jl b/src/solvers/dgsem_t8code/containers_2d.jl
index 1fdee28fc72..104e5590a58 100644
--- a/src/solvers/dgsem_t8code/containers_2d.jl
+++ b/src/solvers/dgsem_t8code/containers_2d.jl
@@ -4,6 +4,7 @@
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
 #! format: noindent
+#! format: off
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes.
 function calc_node_coordinates!(node_coordinates,
diff --git a/src/solvers/dgsem_t8code/containers_3d.jl b/src/solvers/dgsem_t8code/containers_3d.jl
index fddb2d01478..e1e58fafd85 100644
--- a/src/solvers/dgsem_t8code/containers_3d.jl
+++ b/src/solvers/dgsem_t8code/containers_3d.jl
@@ -4,6 +4,7 @@
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
 #! format: noindent
+#! format: off
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes
 function calc_node_coordinates!(node_coordinates,

From 40cbb8662403f28962134f542c7e0bd399364360 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 19 Apr 2024 10:43:39 +0200
Subject: [PATCH 12/89] Upgrading T8code.jl.

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 5e1a60b7723..450c7a0087b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -92,7 +92,7 @@ StaticArrays = "1.5"
 StrideArrays = "0.1.26"
 StructArrays = "0.6.11"
 SummationByPartsOperators = "0.5.41"
-T8code = "0.4.3, 0.5"
+T8code = "0.6.0"
 TimerOutputs = "0.5.7"
 Triangulate = "2.2"
 TriplotBase = "0.1"

From df77f82fb8a29a89656ab9109f717079b0c7205a Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 19 Apr 2024 11:34:22 +0200
Subject: [PATCH 13/89] Fixed examples.

---
 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl | 8 ++++----
 .../elixir_euler_baroclinic_instability.jl                | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
index 9f9876d2753..9163cadada8 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
@@ -38,15 +38,15 @@ summary_callback = SummaryCallback()
 # The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
 analysis_callback = AnalysisCallback(semi, interval = 100)
 
-# The SaveSolutionCallback allows to save the solution to a file in regular intervals
-save_solution = SaveSolutionCallback(interval = 100,
-                                     solution_variables = cons2prim)
+# # The SaveSolutionCallback allows to save the solution to a file in regular intervals
+# save_solution = SaveSolutionCallback(interval = 100,
+#                                      solution_variables = cons2prim)
 
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+callbacks = CallbackSet(summary_callback, analysis_callback, # save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
index 128f1e38a5b..4f5b187885e 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
@@ -227,8 +227,7 @@ volume_flux = flux_kennedy_gruber
 solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
                volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
-# For optimal results, use (16, 8) here
-trees_per_cube_face = (8, 4)
+trees_per_cube_face = (4, 4)
 mesh = Trixi.T8codeMeshCubedSphere(trees_per_cube_face..., 6.371229e6, 30000.0,
                                    polydeg = 5, initial_refinement_level = 0)
 

From b80f890049b2a5e81b7d82a2fe58f5f2f4fc167c Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Mon, 22 Apr 2024 11:21:26 +0200
Subject: [PATCH 14/89] stress different meaning of first argument

it refers to level of refinement in lat lon direction, not number of
tree as in the p4est version
---
 src/meshes/t8code_mesh.jl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 007da861f5b..5242ccd8ae3 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -619,8 +619,9 @@ Construct a cubed spherical shell of given inner radius and thickness as `T8code
 `:inside` and `:outside`.
 
 # Arguments
-- `trees_per_face_dimension::Integer`: the number of trees in the first two local
-                                       dimensions of each face.
+- `lat_lon_levels_per_face_dimension::Integer`: number of trees per patch in longitudinal
+                                                and latitudinal direction given as level of
+                                                refinement.
 - `layers::Integer`: the number of trees in the third local dimension of each face, i.e.,
                      the number of layers of the shell.
 - `inner_radius::Float64`: Radius of the inner side of the shell.
@@ -633,11 +634,13 @@ Construct a cubed spherical shell of given inner radius and thickness as `T8code
 - `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the
                                        simulation starts.
 """
-function T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
+function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_radius,
+                               thickness;
                                polydeg, RealT = Float64, initial_refinement_level = 0)
     NDIMS = 3
     cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness,
-                                               trees_per_face_dimension, layers, mpi_comm())
+                                               lat_lon_levels_per_face_dimension,
+                                               layers, mpi_comm())
     do_face_ghost = mpi_isparallel()
     scheme = t8_scheme_new_default_cxx()
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,

From c46799beb3a48bdb48af4243526d898245929cc7 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Mon, 22 Apr 2024 11:22:24 +0200
Subject: [PATCH 15/89] use lat lon levels

---
 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl  | 3 ++-
 .../t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
index 9163cadada8..3c0817e969d 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
@@ -17,7 +17,8 @@ boundary_condition = BoundaryConditionDirichlet(initial_condition)
 boundary_conditions = Dict(:inside => boundary_condition,
                            :outside => boundary_condition)
 
-mesh = Trixi.T8codeMeshCubedSphere(5, 3, 0.5, 0.5;
+# Note that the first argument refers to the level of refinement, unlike in for p4est
+mesh = Trixi.T8codeMeshCubedSphere(2, 3, 0.5, 0.5;
                                    polydeg = 3, initial_refinement_level = 0)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
index 4f5b187885e..21260288996 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
@@ -227,8 +227,11 @@ volume_flux = flux_kennedy_gruber
 solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
                volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
-trees_per_cube_face = (4, 4)
-mesh = Trixi.T8codeMeshCubedSphere(trees_per_cube_face..., 6.371229e6, 30000.0,
+# For optimal results, use 4 lat lon levels and 8 layers here
+# Note that the first argument refers to the level of refinement, unlike in for p4est
+lat_lon_levels = 3
+layers = 4
+mesh = Trixi.T8codeMeshCubedSphere(lat_lon_levels, layers, 6.371229e6, 30000.0,
                                    polydeg = 5, initial_refinement_level = 0)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,

From e3c2492cb181e48398f78f29ff0920b2dbed0c2c Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Mon, 22 Apr 2024 11:22:55 +0200
Subject: [PATCH 16/89] add t8code cubed sphere tests

---
 test/test_t8code_3d.jl | 49 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 300eaef66c8..72d142fcdb7 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -99,6 +99,22 @@ mkdir(outdir)
         end
     end
 
+    # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and
+    # longitudinal dimensions.
+    @trixi_testset "elixir_advection_cubed_sphere.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"),
+                            l2=[0.002006918015656413],
+                            linf=[0.027655117058380085])
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
@@ -271,6 +287,39 @@ mkdir(outdir)
             @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
         end
     end
+
+    # This test is identical to the one in `test_p4est_3d.jl` besides minor
+    # deviations in the expected error norms.
+    @trixi_testset "elixir_euler_baroclinic_instability.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_euler_baroclinic_instability.jl"),
+                            l2=[
+                                6.725093801700048e-7,
+                                0.00021710076010951073,
+                                0.0004386796338203878,
+                                0.00020836270267103122,
+                                0.07601887903440395,
+                            ],
+                            linf=[
+                                1.9107530539574924e-5,
+                                0.02980358831035801,
+                                0.048476331898047564,
+                                0.02200137344113612,
+                                4.848310144356219,
+                            ],
+                            tspan=(0.0, 1e2),
+                            # Decrease tolerance of adaptive time stepping to get similar results across different systems
+                            abstol=1.0e-9, reltol=1.0e-9,
+                            coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From 7af3f31ab97e8b4fb00bdc90ea2f4713890b8dc8 Mon Sep 17 00:00:00 2001
From: Benedict <135045760+benegee@users.noreply.github.com>
Date: Thu, 25 Apr 2024 17:31:55 +0200
Subject: [PATCH 17/89] Remove TODO comments

---
 src/meshes/t8code_mesh.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 5242ccd8ae3..ecbbf0f7975 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -647,10 +647,8 @@ function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_
                                    mpi_comm())
 
     num_trees = t8_cmesh_get_num_trees(cmesh)
-    # TODO: Init?!
     boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees)
     for itree in 1:num_trees
-        # TODO: z-direction == radial direction in each tree?
         boundary_names[5, itree] = :inside
         boundary_names[6, itree] = :outside
     end

From 6d835e33e0325b2d22629b6e79c1464d3fc0790d Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 10:50:26 +0200
Subject: [PATCH 18/89] Relaxing T8code.jl version requirement.

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index a12522372de..0db43a69548 100644
--- a/Project.toml
+++ b/Project.toml
@@ -92,7 +92,7 @@ StaticArrays = "1.5"
 StrideArrays = "0.1.26"
 StructArrays = "0.6.11"
 SummationByPartsOperators = "0.5.41"
-T8code = "0.6.0"
+T8code = "0.4.3, 0.5, 0.6"
 TimerOutputs = "0.5.7"
 Triangulate = "2.2"
 TriplotBase = "0.1"

From 54a5ec3d3853cc0e5371516221df2639820b6d3d Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 11:27:09 +0200
Subject: [PATCH 19/89] Restricted t8code version requirement.

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 0db43a69548..d0ee452b2a6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -92,7 +92,7 @@ StaticArrays = "1.5"
 StrideArrays = "0.1.26"
 StructArrays = "0.6.11"
 SummationByPartsOperators = "0.5.41"
-T8code = "0.4.3, 0.5, 0.6"
+T8code = "0.5"
 TimerOutputs = "0.5.7"
 Triangulate = "2.2"
 TriplotBase = "0.1"

From 1daec4b7154183c85f272a729cdf8598007e536d Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 11:28:55 +0200
Subject: [PATCH 20/89] Removed cubed spherical shell related code.

---
 .../elixir_advection_cubed_sphere.jl          |  62 ----
 .../elixir_euler_baroclinic_instability.jl    | 301 ------------------
 src/meshes/t8code_mesh.jl                     |  46 ---
 3 files changed, 409 deletions(-)
 delete mode 100644 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
 delete mode 100644 examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl

diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
deleted file mode 100644
index 3c0817e969d..00000000000
--- a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-
-using OrdinaryDiffEq
-using Trixi
-
-###############################################################################
-# semidiscretization of the linear advection equation
-
-advection_velocity = (0.2, -0.7, 0.5)
-equations = LinearScalarAdvectionEquation3D(advection_velocity)
-
-# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-initial_condition = initial_condition_convergence_test
-
-boundary_condition = BoundaryConditionDirichlet(initial_condition)
-boundary_conditions = Dict(:inside => boundary_condition,
-                           :outside => boundary_condition)
-
-# Note that the first argument refers to the level of refinement, unlike in for p4est
-mesh = Trixi.T8codeMeshCubedSphere(2, 3, 0.5, 0.5;
-                                   polydeg = 3, initial_refinement_level = 0)
-
-# A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    boundary_conditions = boundary_conditions)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-# Create ODE problem with time span from 0.0 to 1.0
-tspan = (0.0, 1.0)
-ode = semidiscretize(semi, tspan)
-
-# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
-# and resets the timers
-summary_callback = SummaryCallback()
-
-# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
-analysis_callback = AnalysisCallback(semi, interval = 100)
-
-# # The SaveSolutionCallback allows to save the solution to a file in regular intervals
-# save_solution = SaveSolutionCallback(interval = 100,
-#                                      solution_variables = cons2prim)
-
-# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
-stepsize_callback = StepsizeCallback(cfl = 1.2)
-
-# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, # save_solution,
-                        stepsize_callback)
-
-###############################################################################
-# run the simulation
-
-# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
-            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep = false, callback = callbacks);
-
-# Print the timer summary
-summary_callback()
diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
deleted file mode 100644
index 21260288996..00000000000
--- a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
+++ /dev/null
@@ -1,301 +0,0 @@
-# An idealized baroclinic instability test case
-# For optimal results consider increasing the resolution to 16x16x8 trees per cube face.
-#
-# Note that this elixir can take several hours to run.
-# Using 24 threads of an AMD Ryzen Threadripper 3990X (more threads don't speed it up further)
-# and `check-bounds=no`, this elixirs takes about one hour to run.
-# With 16x16x8 trees per cube face on the same machine, it takes about 28 hours.
-#
-# References:
-# - Paul A. Ullrich, Thomas Melvin, Christiane Jablonowski, Andrew Staniforth (2013)
-#   A proposed baroclinic wave test case for deep- and shallow-atmosphere dynamical cores
-#   https://doi.org/10.1002/qj.2241
-
-using OrdinaryDiffEq
-using Trixi
-using LinearAlgebra
-
-###############################################################################
-# Setup for the baroclinic instability test
-gamma = 1.4
-equations = CompressibleEulerEquations3D(gamma)
-
-# Initial condition for an idealized baroclinic instability test
-# https://doi.org/10.1002/qj.2241, Section 3.2 and Appendix A
-function initial_condition_baroclinic_instability(x, t,
-                                                  equations::CompressibleEulerEquations3D)
-    lon, lat, r = cartesian_to_sphere(x)
-    radius_earth = 6.371229e6
-    # Make sure that the r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-
-    # Unperturbed basic state
-    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-
-    # Stream function type perturbation
-    u_perturbation, v_perturbation = perturbation_stream_function(lon, lat, z)
-
-    u += u_perturbation
-    v = v_perturbation
-
-    # Convert spherical velocity to Cartesian
-    v1 = -sin(lon) * u - sin(lat) * cos(lon) * v
-    v2 = cos(lon) * u - sin(lat) * sin(lon) * v
-    v3 = cos(lat) * v
-
-    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
-end
-
-# Steady state for RHS correction below
-function steady_state_baroclinic_instability(x, t, equations::CompressibleEulerEquations3D)
-    lon, lat, r = cartesian_to_sphere(x)
-    radius_earth = 6.371229e6
-    # Make sure that the r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-
-    # Unperturbed basic state
-    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-
-    # Convert spherical velocity to Cartesian
-    v1 = -sin(lon) * u
-    v2 = cos(lon) * u
-    v3 = 0.0
-
-    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
-end
-
-function cartesian_to_sphere(x)
-    r = norm(x)
-    lambda = atan(x[2], x[1])
-    if lambda < 0
-        lambda += 2 * pi
-    end
-    phi = asin(x[3] / r)
-
-    return lambda, phi, r
-end
-
-# Unperturbed balanced steady-state.
-# Returns primitive variables with only the velocity in longitudinal direction (rho, u, p).
-# The other velocity components are zero.
-function basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-    # Parameters from Table 1 in the paper
-    # Corresponding names in the paper are commented
-    radius_earth = 6.371229e6  # a
-    half_width_parameter = 2           # b
-    gravitational_acceleration = 9.80616     # g
-    k = 3           # k
-    surface_pressure = 1e5         # p₀
-    gas_constant = 287         # R
-    surface_equatorial_temperature = 310.0       # T₀ᴱ
-    surface_polar_temperature = 240.0       # T₀ᴾ
-    lapse_rate = 0.005       # Γ
-    angular_velocity = 7.29212e-5  # Ω
-
-    # Distance to the center of the Earth
-    r = z + radius_earth
-
-    # In the paper: T₀
-    temperature0 = 0.5 * (surface_equatorial_temperature + surface_polar_temperature)
-    # In the paper: A, B, C, H
-    const_a = 1 / lapse_rate
-    const_b = (temperature0 - surface_polar_temperature) /
-              (temperature0 * surface_polar_temperature)
-    const_c = 0.5 * (k + 2) * (surface_equatorial_temperature - surface_polar_temperature) /
-              (surface_equatorial_temperature * surface_polar_temperature)
-    const_h = gas_constant * temperature0 / gravitational_acceleration
-
-    # In the paper: (r - a) / bH
-    scaled_z = z / (half_width_parameter * const_h)
-
-    # Temporary variables
-    temp1 = exp(lapse_rate / temperature0 * z)
-    temp2 = exp(-scaled_z^2)
-
-    # In the paper: ̃τ₁, ̃τ₂
-    tau1 = const_a * lapse_rate / temperature0 * temp1 +
-           const_b * (1 - 2 * scaled_z^2) * temp2
-    tau2 = const_c * (1 - 2 * scaled_z^2) * temp2
-
-    # In the paper: ∫τ₁(r') dr', ∫τ₂(r') dr'
-    inttau1 = const_a * (temp1 - 1) + const_b * z * temp2
-    inttau2 = const_c * z * temp2
-
-    # Temporary variables
-    temp3 = r / radius_earth * cos(lat)
-    temp4 = temp3^k - k / (k + 2) * temp3^(k + 2)
-
-    # In the paper: T
-    temperature = 1 / ((r / radius_earth)^2 * (tau1 - tau2 * temp4))
-
-    # In the paper: U, u (zonal wind, first component of spherical velocity)
-    big_u = gravitational_acceleration / radius_earth * k * temperature * inttau2 *
-            (temp3^(k - 1) - temp3^(k + 1))
-    temp5 = radius_earth * cos(lat)
-    u = -angular_velocity * temp5 + sqrt(angular_velocity^2 * temp5^2 + temp5 * big_u)
-
-    # Hydrostatic pressure
-    p = surface_pressure *
-        exp(-gravitational_acceleration / gas_constant * (inttau1 - inttau2 * temp4))
-
-    # Density (via ideal gas law)
-    rho = p / (gas_constant * temperature)
-
-    return rho, u, p
-end
-
-# Perturbation as in Equations 25 and 26 of the paper (analytical derivative)
-function perturbation_stream_function(lon, lat, z)
-    # Parameters from Table 1 in the paper
-    # Corresponding names in the paper are commented
-    perturbation_radius = 1 / 6      # d₀ / a
-    perturbed_wind_amplitude = 1.0      # Vₚ
-    perturbation_lon = pi / 9     # Longitude of perturbation location
-    perturbation_lat = 2 * pi / 9 # Latitude of perturbation location
-    pertz = 15000    # Perturbation height cap
-
-    # Great circle distance (d in the paper) divided by a (radius of the Earth)
-    # because we never actually need d without dividing by a
-    great_circle_distance_by_a = acos(sin(perturbation_lat) * sin(lat) +
-                                      cos(perturbation_lat) * cos(lat) *
-                                      cos(lon - perturbation_lon))
-
-    # In the first case, the vertical taper function is per definition zero.
-    # In the second case, the stream function is per definition zero.
-    if z > pertz || great_circle_distance_by_a > perturbation_radius
-        return 0.0, 0.0
-    end
-
-    # Vertical tapering of stream function
-    perttaper = 1.0 - 3 * z^2 / pertz^2 + 2 * z^3 / pertz^3
-
-    # sin/cos(pi * d / (2 * d_0)) in the paper
-    sin_, cos_ = sincos(0.5 * pi * great_circle_distance_by_a / perturbation_radius)
-
-    # Common factor for both u and v
-    factor = 16 / (3 * sqrt(3)) * perturbed_wind_amplitude * perttaper * cos_^3 * sin_
-
-    u_perturbation = -factor * (-sin(perturbation_lat) * cos(lat) +
-                      cos(perturbation_lat) * sin(lat) * cos(lon - perturbation_lon)) /
-                     sin(great_circle_distance_by_a)
-
-    v_perturbation = factor * cos(perturbation_lat) * sin(lon - perturbation_lon) /
-                     sin(great_circle_distance_by_a)
-
-    return u_perturbation, v_perturbation
-end
-
-@inline function source_terms_baroclinic_instability(u, x, t,
-                                                     equations::CompressibleEulerEquations3D)
-    radius_earth = 6.371229e6  # a
-    gravitational_acceleration = 9.80616     # g
-    angular_velocity = 7.29212e-5  # Ω
-
-    r = norm(x)
-    # Make sure that r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-    r = z + radius_earth
-
-    du1 = zero(eltype(u))
-
-    # Gravity term
-    temp = -gravitational_acceleration * radius_earth^2 / r^3
-    du2 = temp * u[1] * x[1]
-    du3 = temp * u[1] * x[2]
-    du4 = temp * u[1] * x[3]
-    du5 = temp * (u[2] * x[1] + u[3] * x[2] + u[4] * x[3])
-
-    # Coriolis term, -2Ω × ρv = -2 * angular_velocity * (0, 0, 1) × u[2:4]
-    du2 -= -2 * angular_velocity * u[3]
-    du3 -= 2 * angular_velocity * u[2]
-
-    return SVector(du1, du2, du3, du4, du5)
-end
-
-###############################################################################
-# Start of the actual elixir, semidiscretization of the problem
-
-initial_condition = initial_condition_baroclinic_instability
-
-boundary_conditions = Dict(:inside => boundary_condition_slip_wall,
-                           :outside => boundary_condition_slip_wall)
-
-# This is a good estimate for the speed of sound in this example.
-# Other values between 300 and 400 should work as well.
-surface_flux = FluxLMARS(340)
-volume_flux = flux_kennedy_gruber
-solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
-
-# For optimal results, use 4 lat lon levels and 8 layers here
-# Note that the first argument refers to the level of refinement, unlike in for p4est
-lat_lon_levels = 3
-layers = 4
-mesh = Trixi.T8codeMeshCubedSphere(lat_lon_levels, layers, 6.371229e6, 30000.0,
-                                   polydeg = 5, initial_refinement_level = 0)
-
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    source_terms = source_terms_baroclinic_instability,
-                                    boundary_conditions = boundary_conditions)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-tspan = (0.0, 10 * 24 * 60 * 60.0) # time in seconds for 10 days
-
-# Save RHS of the steady state and subtract it in every RHS evaluation.
-# This trick preserves the steady state exactly (to machine rounding errors, of course).
-# Otherwise, this elixir produces entirely unusable results for a resolution of 8x8x4 cells
-# per cube face with a polydeg of 3.
-# With this trick, even the polydeg 3 simulation produces usable (although badly resolved) results,
-# and most of the grid imprinting in higher polydeg simulation is eliminated.
-#
-# See https://github.com/trixi-framework/Trixi.jl/issues/980 for more information.
-u_steady_state = compute_coefficients(steady_state_baroclinic_instability, tspan[1], semi)
-# Use a `let` block for performance (otherwise du_steady_state will be a global variable)
-let du_steady_state = similar(u_steady_state)
-    # Save RHS of the steady state
-    Trixi.rhs!(du_steady_state, u_steady_state, semi, tspan[1])
-
-    global function corrected_rhs!(du, u, semi, t)
-        # Normal RHS evaluation
-        Trixi.rhs!(du, u, semi, t)
-        # Correct by subtracting the steady-state RHS
-        Trixi.@trixi_timeit Trixi.timer() "rhs correction" begin
-            # Use Trixi.@threaded for threaded performance
-            Trixi.@threaded for i in eachindex(du)
-                du[i] -= du_steady_state[i]
-            end
-        end
-    end
-end
-u0 = compute_coefficients(tspan[1], semi)
-ode = ODEProblem(corrected_rhs!, u0, tspan, semi)
-
-summary_callback = SummaryCallback()
-
-analysis_interval = 5000
-analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
-
-alive_callback = AliveCallback(analysis_interval = analysis_interval)
-
-#save_solution = SaveSolutionCallback(interval = 5000,
-#                                     save_initial_solution = true,
-#                                     save_final_solution = true,
-#                                     solution_variables = cons2prim)
-
-callbacks = CallbackSet(summary_callback,
-                        analysis_callback,
-                        alive_callback)
-#                        , save_solution)
-
-###############################################################################
-# run the simulation
-
-# Use a Runge-Kutta method with automatic (error based) time step size control
-# Enable threading of the RK method for better performance on multiple threads
-sol = solve(ode, RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()); abstol = 1.0e-6,
-            reltol = 1.0e-6,
-            ode_default_options()..., callback = callbacks);
-
-summary_callback() # print the timer summary
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index ecbbf0f7975..be76fb3f56b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -610,52 +610,6 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
                              boundary_names, "")
 end
 
-"""
-T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
-                      polydeg, RealT=Float64, initial_refinement_level=0)
-
-Construct a cubed spherical shell of given inner radius and thickness as `T8codeMesh` with
-`6 * trees_per_face_dimension^2 * layers` trees. The mesh will have two boundaries,
-`:inside` and `:outside`.
-
-# Arguments
-- `lat_lon_levels_per_face_dimension::Integer`: number of trees per patch in longitudinal
-                                                and latitudinal direction given as level of
-                                                refinement.
-- `layers::Integer`: the number of trees in the third local dimension of each face, i.e.,
-                     the number of layers of the shell.
-- `inner_radius::Float64`: Radius of the inner side of the shell.
-- `thickness::Float64`: Thickness of the shell. The outer radius will be
-                        `inner_radius + thickness`.
-- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
-                      The mapping will be approximated by an interpolation polynomial
-                      of the specified degree for each tree.
-- `RealT::Type`: the type that should be used for coordinates.
-- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the
-                                       simulation starts.
-"""
-function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_radius,
-                               thickness;
-                               polydeg, RealT = Float64, initial_refinement_level = 0)
-    NDIMS = 3
-    cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness,
-                                               lat_lon_levels_per_face_dimension,
-                                               layers, mpi_comm())
-    do_face_ghost = mpi_isparallel()
-    scheme = t8_scheme_new_default_cxx()
-    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
-                                   mpi_comm())
-
-    num_trees = t8_cmesh_get_num_trees(cmesh)
-    boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees)
-    for itree in 1:num_trees
-        boundary_names[5, itree] = :inside
-        boundary_names[6, itree] = :outside
-    end
-
-    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg)
-end
-
 struct adapt_callback_passthrough
     adapt_callback::Function
     user_data::Any

From 120af75ad8b49e284b1a32f2ee5b505686f4a70f Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 11:33:18 +0200
Subject: [PATCH 21/89] Removed cubed spherical shell tests.

---
 test/test_t8code_3d.jl | 49 ------------------------------------------
 1 file changed, 49 deletions(-)

diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 72d142fcdb7..300eaef66c8 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -99,22 +99,6 @@ mkdir(outdir)
         end
     end
 
-    # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and
-    # longitudinal dimensions.
-    @trixi_testset "elixir_advection_cubed_sphere.jl" begin
-        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"),
-                            l2=[0.002006918015656413],
-                            linf=[0.027655117058380085])
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-            t = sol.t[end]
-            u_ode = sol.u[end]
-            du_ode = similar(u_ode)
-            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-        end
-    end
-
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
@@ -287,39 +271,6 @@ mkdir(outdir)
             @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
         end
     end
-
-    # This test is identical to the one in `test_p4est_3d.jl` besides minor
-    # deviations in the expected error norms.
-    @trixi_testset "elixir_euler_baroclinic_instability.jl" begin
-        @test_trixi_include(joinpath(EXAMPLES_DIR,
-                                     "elixir_euler_baroclinic_instability.jl"),
-                            l2=[
-                                6.725093801700048e-7,
-                                0.00021710076010951073,
-                                0.0004386796338203878,
-                                0.00020836270267103122,
-                                0.07601887903440395,
-                            ],
-                            linf=[
-                                1.9107530539574924e-5,
-                                0.02980358831035801,
-                                0.048476331898047564,
-                                0.02200137344113612,
-                                4.848310144356219,
-                            ],
-                            tspan=(0.0, 1e2),
-                            # Decrease tolerance of adaptive time stepping to get similar results across different systems
-                            abstol=1.0e-9, reltol=1.0e-9,
-                            coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-            t = sol.t[end]
-            u_ode = sol.u[end]
-            du_ode = similar(u_ode)
-            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-        end
-    end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From e93ebeb32fed06973b4068ba96a21b174c06f24a Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 11:48:40 +0200
Subject: [PATCH 22/89] Including cubed spherical shell setup.

---
 Project.toml                                  |   2 +-
 .../elixir_advection_cubed_sphere.jl          |  62 ----
 .../elixir_euler_baroclinic_instability.jl    | 301 ------------------
 src/meshes/t8code_mesh.jl                     |  46 ---
 test/test_t8code_3d.jl                        |  49 ---
 5 files changed, 1 insertion(+), 459 deletions(-)
 delete mode 100644 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
 delete mode 100644 examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl

diff --git a/Project.toml b/Project.toml
index 0db43a69548..4d801f6518e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -92,7 +92,7 @@ StaticArrays = "1.5"
 StrideArrays = "0.1.26"
 StructArrays = "0.6.11"
 SummationByPartsOperators = "0.5.41"
-T8code = "0.4.3, 0.5, 0.6"
+T8code = "0.6"
 TimerOutputs = "0.5.7"
 Triangulate = "2.2"
 TriplotBase = "0.1"
diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
deleted file mode 100644
index 3c0817e969d..00000000000
--- a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-
-using OrdinaryDiffEq
-using Trixi
-
-###############################################################################
-# semidiscretization of the linear advection equation
-
-advection_velocity = (0.2, -0.7, 0.5)
-equations = LinearScalarAdvectionEquation3D(advection_velocity)
-
-# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-initial_condition = initial_condition_convergence_test
-
-boundary_condition = BoundaryConditionDirichlet(initial_condition)
-boundary_conditions = Dict(:inside => boundary_condition,
-                           :outside => boundary_condition)
-
-# Note that the first argument refers to the level of refinement, unlike in for p4est
-mesh = Trixi.T8codeMeshCubedSphere(2, 3, 0.5, 0.5;
-                                   polydeg = 3, initial_refinement_level = 0)
-
-# A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    boundary_conditions = boundary_conditions)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-# Create ODE problem with time span from 0.0 to 1.0
-tspan = (0.0, 1.0)
-ode = semidiscretize(semi, tspan)
-
-# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
-# and resets the timers
-summary_callback = SummaryCallback()
-
-# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
-analysis_callback = AnalysisCallback(semi, interval = 100)
-
-# # The SaveSolutionCallback allows to save the solution to a file in regular intervals
-# save_solution = SaveSolutionCallback(interval = 100,
-#                                      solution_variables = cons2prim)
-
-# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
-stepsize_callback = StepsizeCallback(cfl = 1.2)
-
-# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, # save_solution,
-                        stepsize_callback)
-
-###############################################################################
-# run the simulation
-
-# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
-            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep = false, callback = callbacks);
-
-# Print the timer summary
-summary_callback()
diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
deleted file mode 100644
index 21260288996..00000000000
--- a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
+++ /dev/null
@@ -1,301 +0,0 @@
-# An idealized baroclinic instability test case
-# For optimal results consider increasing the resolution to 16x16x8 trees per cube face.
-#
-# Note that this elixir can take several hours to run.
-# Using 24 threads of an AMD Ryzen Threadripper 3990X (more threads don't speed it up further)
-# and `check-bounds=no`, this elixirs takes about one hour to run.
-# With 16x16x8 trees per cube face on the same machine, it takes about 28 hours.
-#
-# References:
-# - Paul A. Ullrich, Thomas Melvin, Christiane Jablonowski, Andrew Staniforth (2013)
-#   A proposed baroclinic wave test case for deep- and shallow-atmosphere dynamical cores
-#   https://doi.org/10.1002/qj.2241
-
-using OrdinaryDiffEq
-using Trixi
-using LinearAlgebra
-
-###############################################################################
-# Setup for the baroclinic instability test
-gamma = 1.4
-equations = CompressibleEulerEquations3D(gamma)
-
-# Initial condition for an idealized baroclinic instability test
-# https://doi.org/10.1002/qj.2241, Section 3.2 and Appendix A
-function initial_condition_baroclinic_instability(x, t,
-                                                  equations::CompressibleEulerEquations3D)
-    lon, lat, r = cartesian_to_sphere(x)
-    radius_earth = 6.371229e6
-    # Make sure that the r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-
-    # Unperturbed basic state
-    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-
-    # Stream function type perturbation
-    u_perturbation, v_perturbation = perturbation_stream_function(lon, lat, z)
-
-    u += u_perturbation
-    v = v_perturbation
-
-    # Convert spherical velocity to Cartesian
-    v1 = -sin(lon) * u - sin(lat) * cos(lon) * v
-    v2 = cos(lon) * u - sin(lat) * sin(lon) * v
-    v3 = cos(lat) * v
-
-    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
-end
-
-# Steady state for RHS correction below
-function steady_state_baroclinic_instability(x, t, equations::CompressibleEulerEquations3D)
-    lon, lat, r = cartesian_to_sphere(x)
-    radius_earth = 6.371229e6
-    # Make sure that the r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-
-    # Unperturbed basic state
-    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-
-    # Convert spherical velocity to Cartesian
-    v1 = -sin(lon) * u
-    v2 = cos(lon) * u
-    v3 = 0.0
-
-    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
-end
-
-function cartesian_to_sphere(x)
-    r = norm(x)
-    lambda = atan(x[2], x[1])
-    if lambda < 0
-        lambda += 2 * pi
-    end
-    phi = asin(x[3] / r)
-
-    return lambda, phi, r
-end
-
-# Unperturbed balanced steady-state.
-# Returns primitive variables with only the velocity in longitudinal direction (rho, u, p).
-# The other velocity components are zero.
-function basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
-    # Parameters from Table 1 in the paper
-    # Corresponding names in the paper are commented
-    radius_earth = 6.371229e6  # a
-    half_width_parameter = 2           # b
-    gravitational_acceleration = 9.80616     # g
-    k = 3           # k
-    surface_pressure = 1e5         # p₀
-    gas_constant = 287         # R
-    surface_equatorial_temperature = 310.0       # T₀ᴱ
-    surface_polar_temperature = 240.0       # T₀ᴾ
-    lapse_rate = 0.005       # Γ
-    angular_velocity = 7.29212e-5  # Ω
-
-    # Distance to the center of the Earth
-    r = z + radius_earth
-
-    # In the paper: T₀
-    temperature0 = 0.5 * (surface_equatorial_temperature + surface_polar_temperature)
-    # In the paper: A, B, C, H
-    const_a = 1 / lapse_rate
-    const_b = (temperature0 - surface_polar_temperature) /
-              (temperature0 * surface_polar_temperature)
-    const_c = 0.5 * (k + 2) * (surface_equatorial_temperature - surface_polar_temperature) /
-              (surface_equatorial_temperature * surface_polar_temperature)
-    const_h = gas_constant * temperature0 / gravitational_acceleration
-
-    # In the paper: (r - a) / bH
-    scaled_z = z / (half_width_parameter * const_h)
-
-    # Temporary variables
-    temp1 = exp(lapse_rate / temperature0 * z)
-    temp2 = exp(-scaled_z^2)
-
-    # In the paper: ̃τ₁, ̃τ₂
-    tau1 = const_a * lapse_rate / temperature0 * temp1 +
-           const_b * (1 - 2 * scaled_z^2) * temp2
-    tau2 = const_c * (1 - 2 * scaled_z^2) * temp2
-
-    # In the paper: ∫τ₁(r') dr', ∫τ₂(r') dr'
-    inttau1 = const_a * (temp1 - 1) + const_b * z * temp2
-    inttau2 = const_c * z * temp2
-
-    # Temporary variables
-    temp3 = r / radius_earth * cos(lat)
-    temp4 = temp3^k - k / (k + 2) * temp3^(k + 2)
-
-    # In the paper: T
-    temperature = 1 / ((r / radius_earth)^2 * (tau1 - tau2 * temp4))
-
-    # In the paper: U, u (zonal wind, first component of spherical velocity)
-    big_u = gravitational_acceleration / radius_earth * k * temperature * inttau2 *
-            (temp3^(k - 1) - temp3^(k + 1))
-    temp5 = radius_earth * cos(lat)
-    u = -angular_velocity * temp5 + sqrt(angular_velocity^2 * temp5^2 + temp5 * big_u)
-
-    # Hydrostatic pressure
-    p = surface_pressure *
-        exp(-gravitational_acceleration / gas_constant * (inttau1 - inttau2 * temp4))
-
-    # Density (via ideal gas law)
-    rho = p / (gas_constant * temperature)
-
-    return rho, u, p
-end
-
-# Perturbation as in Equations 25 and 26 of the paper (analytical derivative)
-function perturbation_stream_function(lon, lat, z)
-    # Parameters from Table 1 in the paper
-    # Corresponding names in the paper are commented
-    perturbation_radius = 1 / 6      # d₀ / a
-    perturbed_wind_amplitude = 1.0      # Vₚ
-    perturbation_lon = pi / 9     # Longitude of perturbation location
-    perturbation_lat = 2 * pi / 9 # Latitude of perturbation location
-    pertz = 15000    # Perturbation height cap
-
-    # Great circle distance (d in the paper) divided by a (radius of the Earth)
-    # because we never actually need d without dividing by a
-    great_circle_distance_by_a = acos(sin(perturbation_lat) * sin(lat) +
-                                      cos(perturbation_lat) * cos(lat) *
-                                      cos(lon - perturbation_lon))
-
-    # In the first case, the vertical taper function is per definition zero.
-    # In the second case, the stream function is per definition zero.
-    if z > pertz || great_circle_distance_by_a > perturbation_radius
-        return 0.0, 0.0
-    end
-
-    # Vertical tapering of stream function
-    perttaper = 1.0 - 3 * z^2 / pertz^2 + 2 * z^3 / pertz^3
-
-    # sin/cos(pi * d / (2 * d_0)) in the paper
-    sin_, cos_ = sincos(0.5 * pi * great_circle_distance_by_a / perturbation_radius)
-
-    # Common factor for both u and v
-    factor = 16 / (3 * sqrt(3)) * perturbed_wind_amplitude * perttaper * cos_^3 * sin_
-
-    u_perturbation = -factor * (-sin(perturbation_lat) * cos(lat) +
-                      cos(perturbation_lat) * sin(lat) * cos(lon - perturbation_lon)) /
-                     sin(great_circle_distance_by_a)
-
-    v_perturbation = factor * cos(perturbation_lat) * sin(lon - perturbation_lon) /
-                     sin(great_circle_distance_by_a)
-
-    return u_perturbation, v_perturbation
-end
-
-@inline function source_terms_baroclinic_instability(u, x, t,
-                                                     equations::CompressibleEulerEquations3D)
-    radius_earth = 6.371229e6  # a
-    gravitational_acceleration = 9.80616     # g
-    angular_velocity = 7.29212e-5  # Ω
-
-    r = norm(x)
-    # Make sure that r is not smaller than radius_earth
-    z = max(r - radius_earth, 0.0)
-    r = z + radius_earth
-
-    du1 = zero(eltype(u))
-
-    # Gravity term
-    temp = -gravitational_acceleration * radius_earth^2 / r^3
-    du2 = temp * u[1] * x[1]
-    du3 = temp * u[1] * x[2]
-    du4 = temp * u[1] * x[3]
-    du5 = temp * (u[2] * x[1] + u[3] * x[2] + u[4] * x[3])
-
-    # Coriolis term, -2Ω × ρv = -2 * angular_velocity * (0, 0, 1) × u[2:4]
-    du2 -= -2 * angular_velocity * u[3]
-    du3 -= 2 * angular_velocity * u[2]
-
-    return SVector(du1, du2, du3, du4, du5)
-end
-
-###############################################################################
-# Start of the actual elixir, semidiscretization of the problem
-
-initial_condition = initial_condition_baroclinic_instability
-
-boundary_conditions = Dict(:inside => boundary_condition_slip_wall,
-                           :outside => boundary_condition_slip_wall)
-
-# This is a good estimate for the speed of sound in this example.
-# Other values between 300 and 400 should work as well.
-surface_flux = FluxLMARS(340)
-volume_flux = flux_kennedy_gruber
-solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
-
-# For optimal results, use 4 lat lon levels and 8 layers here
-# Note that the first argument refers to the level of refinement, unlike in for p4est
-lat_lon_levels = 3
-layers = 4
-mesh = Trixi.T8codeMeshCubedSphere(lat_lon_levels, layers, 6.371229e6, 30000.0,
-                                   polydeg = 5, initial_refinement_level = 0)
-
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    source_terms = source_terms_baroclinic_instability,
-                                    boundary_conditions = boundary_conditions)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-tspan = (0.0, 10 * 24 * 60 * 60.0) # time in seconds for 10 days
-
-# Save RHS of the steady state and subtract it in every RHS evaluation.
-# This trick preserves the steady state exactly (to machine rounding errors, of course).
-# Otherwise, this elixir produces entirely unusable results for a resolution of 8x8x4 cells
-# per cube face with a polydeg of 3.
-# With this trick, even the polydeg 3 simulation produces usable (although badly resolved) results,
-# and most of the grid imprinting in higher polydeg simulation is eliminated.
-#
-# See https://github.com/trixi-framework/Trixi.jl/issues/980 for more information.
-u_steady_state = compute_coefficients(steady_state_baroclinic_instability, tspan[1], semi)
-# Use a `let` block for performance (otherwise du_steady_state will be a global variable)
-let du_steady_state = similar(u_steady_state)
-    # Save RHS of the steady state
-    Trixi.rhs!(du_steady_state, u_steady_state, semi, tspan[1])
-
-    global function corrected_rhs!(du, u, semi, t)
-        # Normal RHS evaluation
-        Trixi.rhs!(du, u, semi, t)
-        # Correct by subtracting the steady-state RHS
-        Trixi.@trixi_timeit Trixi.timer() "rhs correction" begin
-            # Use Trixi.@threaded for threaded performance
-            Trixi.@threaded for i in eachindex(du)
-                du[i] -= du_steady_state[i]
-            end
-        end
-    end
-end
-u0 = compute_coefficients(tspan[1], semi)
-ode = ODEProblem(corrected_rhs!, u0, tspan, semi)
-
-summary_callback = SummaryCallback()
-
-analysis_interval = 5000
-analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
-
-alive_callback = AliveCallback(analysis_interval = analysis_interval)
-
-#save_solution = SaveSolutionCallback(interval = 5000,
-#                                     save_initial_solution = true,
-#                                     save_final_solution = true,
-#                                     solution_variables = cons2prim)
-
-callbacks = CallbackSet(summary_callback,
-                        analysis_callback,
-                        alive_callback)
-#                        , save_solution)
-
-###############################################################################
-# run the simulation
-
-# Use a Runge-Kutta method with automatic (error based) time step size control
-# Enable threading of the RK method for better performance on multiple threads
-sol = solve(ode, RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()); abstol = 1.0e-6,
-            reltol = 1.0e-6,
-            ode_default_options()..., callback = callbacks);
-
-summary_callback() # print the timer summary
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index ecbbf0f7975..be76fb3f56b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -610,52 +610,6 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
                              boundary_names, "")
 end
 
-"""
-T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
-                      polydeg, RealT=Float64, initial_refinement_level=0)
-
-Construct a cubed spherical shell of given inner radius and thickness as `T8codeMesh` with
-`6 * trees_per_face_dimension^2 * layers` trees. The mesh will have two boundaries,
-`:inside` and `:outside`.
-
-# Arguments
-- `lat_lon_levels_per_face_dimension::Integer`: number of trees per patch in longitudinal
-                                                and latitudinal direction given as level of
-                                                refinement.
-- `layers::Integer`: the number of trees in the third local dimension of each face, i.e.,
-                     the number of layers of the shell.
-- `inner_radius::Float64`: Radius of the inner side of the shell.
-- `thickness::Float64`: Thickness of the shell. The outer radius will be
-                        `inner_radius + thickness`.
-- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
-                      The mapping will be approximated by an interpolation polynomial
-                      of the specified degree for each tree.
-- `RealT::Type`: the type that should be used for coordinates.
-- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the
-                                       simulation starts.
-"""
-function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_radius,
-                               thickness;
-                               polydeg, RealT = Float64, initial_refinement_level = 0)
-    NDIMS = 3
-    cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness,
-                                               lat_lon_levels_per_face_dimension,
-                                               layers, mpi_comm())
-    do_face_ghost = mpi_isparallel()
-    scheme = t8_scheme_new_default_cxx()
-    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
-                                   mpi_comm())
-
-    num_trees = t8_cmesh_get_num_trees(cmesh)
-    boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees)
-    for itree in 1:num_trees
-        boundary_names[5, itree] = :inside
-        boundary_names[6, itree] = :outside
-    end
-
-    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg)
-end
-
 struct adapt_callback_passthrough
     adapt_callback::Function
     user_data::Any
diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 72d142fcdb7..300eaef66c8 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -99,22 +99,6 @@ mkdir(outdir)
         end
     end
 
-    # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and
-    # longitudinal dimensions.
-    @trixi_testset "elixir_advection_cubed_sphere.jl" begin
-        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"),
-                            l2=[0.002006918015656413],
-                            linf=[0.027655117058380085])
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-            t = sol.t[end]
-            u_ode = sol.u[end]
-            du_ode = similar(u_ode)
-            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-        end
-    end
-
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
@@ -287,39 +271,6 @@ mkdir(outdir)
             @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
         end
     end
-
-    # This test is identical to the one in `test_p4est_3d.jl` besides minor
-    # deviations in the expected error norms.
-    @trixi_testset "elixir_euler_baroclinic_instability.jl" begin
-        @test_trixi_include(joinpath(EXAMPLES_DIR,
-                                     "elixir_euler_baroclinic_instability.jl"),
-                            l2=[
-                                6.725093801700048e-7,
-                                0.00021710076010951073,
-                                0.0004386796338203878,
-                                0.00020836270267103122,
-                                0.07601887903440395,
-                            ],
-                            linf=[
-                                1.9107530539574924e-5,
-                                0.02980358831035801,
-                                0.048476331898047564,
-                                0.02200137344113612,
-                                4.848310144356219,
-                            ],
-                            tspan=(0.0, 1e2),
-                            # Decrease tolerance of adaptive time stepping to get similar results across different systems
-                            abstol=1.0e-9, reltol=1.0e-9,
-                            coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-            t = sol.t[end]
-            u_ode = sol.u[end]
-            du_ode = similar(u_ode)
-            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-        end
-    end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From ae141d5d059e5633ae788373e01be8abe81a92f9 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 11:51:46 +0200
Subject: [PATCH 23/89] Modified GH ci such that it uses the latest T8code.jl
 package.

---
 .github/workflows/ci.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 27f7af7c528..45bacaf2361 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,6 +116,12 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
+      - name: Configure new T8code.jl version environment
+        shell: julia --project="." --color=yes {0}
+        run: |
+          using Pkg
+          Pkg.add(PackageSpec(url="https://github.com/DLR-AMR/T8code.jl", rev="bump-to-t8code-2.0.0"))
+          Pkg.instantiate()
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:

From 1d0d3f5a7a76960bb2f088cdafddb01ffa5d9583 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 12:01:05 +0200
Subject: [PATCH 24/89] Included cubed spherical shell setups and tests.

---
 .../elixir_advection_cubed_sphere.jl          |  61 ++++
 .../elixir_euler_baroclinic_instability.jl    | 301 ++++++++++++++++++
 src/meshes/t8code_mesh.jl                     |  46 +++
 test/test_t8code_3d.jl                        |  49 +++
 4 files changed, 457 insertions(+)
 create mode 100644 examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
 create mode 100644 examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl

diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
new file mode 100644
index 00000000000..556d8a02893
--- /dev/null
+++ b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl
@@ -0,0 +1,61 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7, 0.5)
+equations = LinearScalarAdvectionEquation3D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+initial_condition = initial_condition_convergence_test
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:inside => boundary_condition,
+                           :outside => boundary_condition)
+
+# Note that the first argument refers to the level of refinement, unlike in for p4est
+mesh = Trixi.T8codeMeshCubedSphere(2, 3, 0.5, 0.5;
+                                   polydeg = 3, initial_refinement_level = 0)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# # The SaveSolutionCallback allows to save the solution to a file in regular intervals
+# save_solution = SaveSolutionCallback(interval = 100,
+#                                      solution_variables = cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.2)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, # save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
new file mode 100644
index 00000000000..21260288996
--- /dev/null
+++ b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl
@@ -0,0 +1,301 @@
+# An idealized baroclinic instability test case
+# For optimal results consider increasing the resolution to 16x16x8 trees per cube face.
+#
+# Note that this elixir can take several hours to run.
+# Using 24 threads of an AMD Ryzen Threadripper 3990X (more threads don't speed it up further)
+# and `check-bounds=no`, this elixirs takes about one hour to run.
+# With 16x16x8 trees per cube face on the same machine, it takes about 28 hours.
+#
+# References:
+# - Paul A. Ullrich, Thomas Melvin, Christiane Jablonowski, Andrew Staniforth (2013)
+#   A proposed baroclinic wave test case for deep- and shallow-atmosphere dynamical cores
+#   https://doi.org/10.1002/qj.2241
+
+using OrdinaryDiffEq
+using Trixi
+using LinearAlgebra
+
+###############################################################################
+# Setup for the baroclinic instability test
+gamma = 1.4
+equations = CompressibleEulerEquations3D(gamma)
+
+# Initial condition for an idealized baroclinic instability test
+# https://doi.org/10.1002/qj.2241, Section 3.2 and Appendix A
+function initial_condition_baroclinic_instability(x, t,
+                                                  equations::CompressibleEulerEquations3D)
+    lon, lat, r = cartesian_to_sphere(x)
+    radius_earth = 6.371229e6
+    # Make sure that the r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+
+    # Unperturbed basic state
+    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+
+    # Stream function type perturbation
+    u_perturbation, v_perturbation = perturbation_stream_function(lon, lat, z)
+
+    u += u_perturbation
+    v = v_perturbation
+
+    # Convert spherical velocity to Cartesian
+    v1 = -sin(lon) * u - sin(lat) * cos(lon) * v
+    v2 = cos(lon) * u - sin(lat) * sin(lon) * v
+    v3 = cos(lat) * v
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+# Steady state for RHS correction below
+function steady_state_baroclinic_instability(x, t, equations::CompressibleEulerEquations3D)
+    lon, lat, r = cartesian_to_sphere(x)
+    radius_earth = 6.371229e6
+    # Make sure that the r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+
+    # Unperturbed basic state
+    rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+
+    # Convert spherical velocity to Cartesian
+    v1 = -sin(lon) * u
+    v2 = cos(lon) * u
+    v3 = 0.0
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+function cartesian_to_sphere(x)
+    r = norm(x)
+    lambda = atan(x[2], x[1])
+    if lambda < 0
+        lambda += 2 * pi
+    end
+    phi = asin(x[3] / r)
+
+    return lambda, phi, r
+end
+
+# Unperturbed balanced steady-state.
+# Returns primitive variables with only the velocity in longitudinal direction (rho, u, p).
+# The other velocity components are zero.
+function basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z)
+    # Parameters from Table 1 in the paper
+    # Corresponding names in the paper are commented
+    radius_earth = 6.371229e6  # a
+    half_width_parameter = 2           # b
+    gravitational_acceleration = 9.80616     # g
+    k = 3           # k
+    surface_pressure = 1e5         # p₀
+    gas_constant = 287         # R
+    surface_equatorial_temperature = 310.0       # T₀ᴱ
+    surface_polar_temperature = 240.0       # T₀ᴾ
+    lapse_rate = 0.005       # Γ
+    angular_velocity = 7.29212e-5  # Ω
+
+    # Distance to the center of the Earth
+    r = z + radius_earth
+
+    # In the paper: T₀
+    temperature0 = 0.5 * (surface_equatorial_temperature + surface_polar_temperature)
+    # In the paper: A, B, C, H
+    const_a = 1 / lapse_rate
+    const_b = (temperature0 - surface_polar_temperature) /
+              (temperature0 * surface_polar_temperature)
+    const_c = 0.5 * (k + 2) * (surface_equatorial_temperature - surface_polar_temperature) /
+              (surface_equatorial_temperature * surface_polar_temperature)
+    const_h = gas_constant * temperature0 / gravitational_acceleration
+
+    # In the paper: (r - a) / bH
+    scaled_z = z / (half_width_parameter * const_h)
+
+    # Temporary variables
+    temp1 = exp(lapse_rate / temperature0 * z)
+    temp2 = exp(-scaled_z^2)
+
+    # In the paper: ̃τ₁, ̃τ₂
+    tau1 = const_a * lapse_rate / temperature0 * temp1 +
+           const_b * (1 - 2 * scaled_z^2) * temp2
+    tau2 = const_c * (1 - 2 * scaled_z^2) * temp2
+
+    # In the paper: ∫τ₁(r') dr', ∫τ₂(r') dr'
+    inttau1 = const_a * (temp1 - 1) + const_b * z * temp2
+    inttau2 = const_c * z * temp2
+
+    # Temporary variables
+    temp3 = r / radius_earth * cos(lat)
+    temp4 = temp3^k - k / (k + 2) * temp3^(k + 2)
+
+    # In the paper: T
+    temperature = 1 / ((r / radius_earth)^2 * (tau1 - tau2 * temp4))
+
+    # In the paper: U, u (zonal wind, first component of spherical velocity)
+    big_u = gravitational_acceleration / radius_earth * k * temperature * inttau2 *
+            (temp3^(k - 1) - temp3^(k + 1))
+    temp5 = radius_earth * cos(lat)
+    u = -angular_velocity * temp5 + sqrt(angular_velocity^2 * temp5^2 + temp5 * big_u)
+
+    # Hydrostatic pressure
+    p = surface_pressure *
+        exp(-gravitational_acceleration / gas_constant * (inttau1 - inttau2 * temp4))
+
+    # Density (via ideal gas law)
+    rho = p / (gas_constant * temperature)
+
+    return rho, u, p
+end
+
+# Perturbation as in Equations 25 and 26 of the paper (analytical derivative)
+function perturbation_stream_function(lon, lat, z)
+    # Parameters from Table 1 in the paper
+    # Corresponding names in the paper are commented
+    perturbation_radius = 1 / 6      # d₀ / a
+    perturbed_wind_amplitude = 1.0      # Vₚ
+    perturbation_lon = pi / 9     # Longitude of perturbation location
+    perturbation_lat = 2 * pi / 9 # Latitude of perturbation location
+    pertz = 15000    # Perturbation height cap
+
+    # Great circle distance (d in the paper) divided by a (radius of the Earth)
+    # because we never actually need d without dividing by a
+    great_circle_distance_by_a = acos(sin(perturbation_lat) * sin(lat) +
+                                      cos(perturbation_lat) * cos(lat) *
+                                      cos(lon - perturbation_lon))
+
+    # In the first case, the vertical taper function is per definition zero.
+    # In the second case, the stream function is per definition zero.
+    if z > pertz || great_circle_distance_by_a > perturbation_radius
+        return 0.0, 0.0
+    end
+
+    # Vertical tapering of stream function
+    perttaper = 1.0 - 3 * z^2 / pertz^2 + 2 * z^3 / pertz^3
+
+    # sin/cos(pi * d / (2 * d_0)) in the paper
+    sin_, cos_ = sincos(0.5 * pi * great_circle_distance_by_a / perturbation_radius)
+
+    # Common factor for both u and v
+    factor = 16 / (3 * sqrt(3)) * perturbed_wind_amplitude * perttaper * cos_^3 * sin_
+
+    u_perturbation = -factor * (-sin(perturbation_lat) * cos(lat) +
+                      cos(perturbation_lat) * sin(lat) * cos(lon - perturbation_lon)) /
+                     sin(great_circle_distance_by_a)
+
+    v_perturbation = factor * cos(perturbation_lat) * sin(lon - perturbation_lon) /
+                     sin(great_circle_distance_by_a)
+
+    return u_perturbation, v_perturbation
+end
+
+@inline function source_terms_baroclinic_instability(u, x, t,
+                                                     equations::CompressibleEulerEquations3D)
+    radius_earth = 6.371229e6  # a
+    gravitational_acceleration = 9.80616     # g
+    angular_velocity = 7.29212e-5  # Ω
+
+    r = norm(x)
+    # Make sure that r is not smaller than radius_earth
+    z = max(r - radius_earth, 0.0)
+    r = z + radius_earth
+
+    du1 = zero(eltype(u))
+
+    # Gravity term
+    temp = -gravitational_acceleration * radius_earth^2 / r^3
+    du2 = temp * u[1] * x[1]
+    du3 = temp * u[1] * x[2]
+    du4 = temp * u[1] * x[3]
+    du5 = temp * (u[2] * x[1] + u[3] * x[2] + u[4] * x[3])
+
+    # Coriolis term, -2Ω × ρv = -2 * angular_velocity * (0, 0, 1) × u[2:4]
+    du2 -= -2 * angular_velocity * u[3]
+    du3 -= 2 * angular_velocity * u[2]
+
+    return SVector(du1, du2, du3, du4, du5)
+end
+
+###############################################################################
+# Start of the actual elixir, semidiscretization of the problem
+
+initial_condition = initial_condition_baroclinic_instability
+
+boundary_conditions = Dict(:inside => boundary_condition_slip_wall,
+                           :outside => boundary_condition_slip_wall)
+
+# This is a good estimate for the speed of sound in this example.
+# Other values between 300 and 400 should work as well.
+surface_flux = FluxLMARS(340)
+volume_flux = flux_kennedy_gruber
+solver = DGSEM(polydeg = 5, surface_flux = surface_flux,
+               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+
+# For optimal results, use 4 lat lon levels and 8 layers here
+# Note that the first argument refers to the level of refinement, unlike in for p4est
+lat_lon_levels = 3
+layers = 4
+mesh = Trixi.T8codeMeshCubedSphere(lat_lon_levels, layers, 6.371229e6, 30000.0,
+                                   polydeg = 5, initial_refinement_level = 0)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms_baroclinic_instability,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10 * 24 * 60 * 60.0) # time in seconds for 10 days
+
+# Save RHS of the steady state and subtract it in every RHS evaluation.
+# This trick preserves the steady state exactly (to machine rounding errors, of course).
+# Otherwise, this elixir produces entirely unusable results for a resolution of 8x8x4 cells
+# per cube face with a polydeg of 3.
+# With this trick, even the polydeg 3 simulation produces usable (although badly resolved) results,
+# and most of the grid imprinting in higher polydeg simulation is eliminated.
+#
+# See https://github.com/trixi-framework/Trixi.jl/issues/980 for more information.
+u_steady_state = compute_coefficients(steady_state_baroclinic_instability, tspan[1], semi)
+# Use a `let` block for performance (otherwise du_steady_state will be a global variable)
+let du_steady_state = similar(u_steady_state)
+    # Save RHS of the steady state
+    Trixi.rhs!(du_steady_state, u_steady_state, semi, tspan[1])
+
+    global function corrected_rhs!(du, u, semi, t)
+        # Normal RHS evaluation
+        Trixi.rhs!(du, u, semi, t)
+        # Correct by subtracting the steady-state RHS
+        Trixi.@trixi_timeit Trixi.timer() "rhs correction" begin
+            # Use Trixi.@threaded for threaded performance
+            Trixi.@threaded for i in eachindex(du)
+                du[i] -= du_steady_state[i]
+            end
+        end
+    end
+end
+u0 = compute_coefficients(tspan[1], semi)
+ode = ODEProblem(corrected_rhs!, u0, tspan, semi)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 5000
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+#save_solution = SaveSolutionCallback(interval = 5000,
+#                                     save_initial_solution = true,
+#                                     save_final_solution = true,
+#                                     solution_variables = cons2prim)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+#                        , save_solution)
+
+###############################################################################
+# run the simulation
+
+# Use a Runge-Kutta method with automatic (error based) time step size control
+# Enable threading of the RK method for better performance on multiple threads
+sol = solve(ode, RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()); abstol = 1.0e-6,
+            reltol = 1.0e-6,
+            ode_default_options()..., callback = callbacks);
+
+summary_callback() # print the timer summary
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index be76fb3f56b..34e0e119f9d 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -615,6 +615,52 @@ struct adapt_callback_passthrough
     user_data::Any
 end
 
+"""
+T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
+                      polydeg, RealT=Float64, initial_refinement_level=0)
+
+Construct a cubed spherical shell of given inner radius and thickness as `T8codeMesh` with
+`6 * trees_per_face_dimension^2 * layers` trees. The mesh will have two boundaries,
+`:inside` and `:outside`.
+
+# Arguments
+- `lat_lon_levels_per_face_dimension::Integer`: number of trees per patch in longitudinal
+                                                and latitudinal direction given as level of
+                                                refinement.
+- `layers::Integer`: the number of trees in the third local dimension of each face, i.e.,
+                     the number of layers of the shell.
+- `inner_radius::Float64`: Radius of the inner side of the shell.
+- `thickness::Float64`: Thickness of the shell. The outer radius will be
+                        `inner_radius + thickness`.
+- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+- `RealT::Type`: the type that should be used for coordinates.
+- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the
+                                       simulation starts.
+"""
+function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_radius,
+                               thickness;
+                               polydeg, RealT = Float64, initial_refinement_level = 0)
+    NDIMS = 3
+    cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness,
+                                               lat_lon_levels_per_face_dimension,
+                                               layers, mpi_comm())
+    do_face_ghost = mpi_isparallel()
+    scheme = t8_scheme_new_default_cxx()
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
+                                   mpi_comm())
+
+    num_trees = t8_cmesh_get_num_trees(cmesh)
+    boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees)
+    for itree in 1:num_trees
+        boundary_names[5, itree] = :inside
+        boundary_names[6, itree] = :outside
+    end
+
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg)
+end
+
 # Callback function prototype to decide for refining and coarsening.
 # If `is_family` equals 1, the first `num_elements` in elements
 # form a family and we decide whether this family should be coarsened
diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 300eaef66c8..72d142fcdb7 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -99,6 +99,22 @@ mkdir(outdir)
         end
     end
 
+    # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and
+    # longitudinal dimensions.
+    @trixi_testset "elixir_advection_cubed_sphere.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"),
+                            l2=[0.002006918015656413],
+                            linf=[0.027655117058380085])
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
@@ -271,6 +287,39 @@ mkdir(outdir)
             @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
         end
     end
+
+    # This test is identical to the one in `test_p4est_3d.jl` besides minor
+    # deviations in the expected error norms.
+    @trixi_testset "elixir_euler_baroclinic_instability.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_euler_baroclinic_instability.jl"),
+                            l2=[
+                                6.725093801700048e-7,
+                                0.00021710076010951073,
+                                0.0004386796338203878,
+                                0.00020836270267103122,
+                                0.07601887903440395,
+                            ],
+                            linf=[
+                                1.9107530539574924e-5,
+                                0.02980358831035801,
+                                0.048476331898047564,
+                                0.02200137344113612,
+                                4.848310144356219,
+                            ],
+                            tspan=(0.0, 1e2),
+                            # Decrease tolerance of adaptive time stepping to get similar results across different systems
+                            abstol=1.0e-9, reltol=1.0e-9,
+                            coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From 36e9ba9bd944ed96111ad3d4823d600d843f63b5 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 12:08:29 +0200
Subject: [PATCH 25/89] Switching order of CI steps.

---
 .github/workflows/ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 45bacaf2361..f23896d270d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -113,15 +113,15 @@ jobs:
           arch: ${{ matrix.arch }}
       - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
       - uses: julia-actions/cache@v1
-      - uses: julia-actions/julia-buildpkg@v1
-        env:
-          PYTHON: ""
       - name: Configure new T8code.jl version environment
         shell: julia --project="." --color=yes {0}
         run: |
           using Pkg
           Pkg.add(PackageSpec(url="https://github.com/DLR-AMR/T8code.jl", rev="bump-to-t8code-2.0.0"))
           Pkg.instantiate()
+      - uses: julia-actions/julia-buildpkg@v1
+        env:
+          PYTHON: ""
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:

From 171a43c8bcf373f9f7be6d8d77c12c8c35811a89 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 12:18:28 +0200
Subject: [PATCH 26/89] Changed line in CI.

---
 .github/workflows/ci.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f23896d270d..e652e7c3f58 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,14 +111,10 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
+      - run: julia -e 'using Pkg; Pkg.Registry.add(url="https://github.com/DLR-AMR/T8code.jl", rev="bump-to-t8code-2.0.0"))'
+          shell: bash
       - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
       - uses: julia-actions/cache@v1
-      - name: Configure new T8code.jl version environment
-        shell: julia --project="." --color=yes {0}
-        run: |
-          using Pkg
-          Pkg.add(PackageSpec(url="https://github.com/DLR-AMR/T8code.jl", rev="bump-to-t8code-2.0.0"))
-          Pkg.instantiate()
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""

From 4317d5ee7e3068a935bce2dfed75f4eef47e67af Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 26 Apr 2024 14:50:11 +0200
Subject: [PATCH 27/89] Increasing code coverage.

---
 test/test_t8code_2d.jl | 17 +++++++++++++++++
 test/test_t8code_3d.jl | 11 +++++++++++
 2 files changed, 28 insertions(+)

diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index d536a6dd73a..d99775f743d 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -30,6 +30,12 @@ mkdir(outdir)
     end
 end
 
+@trixi_testset "test load mesh from path" begin
+    @test_throws "Unknown file extension: .unknown_ext" begin
+        mesh = T8codeMesh(touch("dummy.unknown_ext"), 2)
+    end
+end
+
 @trixi_testset "test check_for_negative_volumes" begin
     @test_warn "Discovered negative volumes" begin
         # Unstructured mesh with six cells which have left-handed node ordering.
@@ -42,6 +48,17 @@ end
     end
 end
 
+@trixi_testset "test t8code mesh from p4est connectivity" begin
+    @test begin
+        # Here we use the connectivity constructor from `P4est.jl` since the
+        # method dispatch works only on `Ptr{p4est_connectivity}` which
+        # actually is `Ptr{P4est.LibP4est.p4est_connectivity}`.
+        conn = Trixi.P4est.LibP4est.p4est_connectivity_new_brick(2, 3, 1, 1)
+        mesh = T8codeMesh(conn)
+        all(size(mesh.tree_node_coordinates) .== (2, 2, 2, 6))
+    end
+end
+
 @trixi_testset "elixir_advection_basic.jl" begin
     # This test is identical to the one in `test_p4est_2d.jl`.
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"),
diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 300eaef66c8..81d2a7cdd85 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -13,6 +13,17 @@ isdir(outdir) && rm(outdir, recursive = true)
 mkdir(outdir)
 
 @testset "T8codeMesh3D" begin
+    @trixi_testset "test t8code mesh from p8est connectivity" begin
+        @test begin
+            # Here we use the connectivity constructor from `P4est.jl` since the
+            # method dispatch works only on `Ptr{p8est_connectivity}` which
+            # actually is `Ptr{P4est.LibP4est.p8est_connectivity}`.
+            conn = Trixi.P4est.LibP4est.p8est_connectivity_new_brick(2, 3, 4, 1, 1, 1)
+            mesh = T8codeMesh(conn)
+            all(size(mesh.tree_node_coordinates) .== (3, 2, 2, 2, 24))
+        end
+    end
+
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_advection_basic.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"),

From 4b5694dc96e0dc2d1702bc4333d42ad61123866b Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 30 Apr 2024 17:28:59 +0200
Subject: [PATCH 28/89] Increasing code coverage.

---
 .../elixir_advection_nonconforming_flag.jl      |  3 ++-
 src/meshes/t8code_mesh.jl                       | 17 ++++++++++-------
 test/test_t8code_2d.jl                          | 10 ++++++++++
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
index a39f3a7e195..4161f85e380 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -25,7 +25,8 @@ mapping = Trixi.transfinite_mapping(faces)
 trees_per_dimension = (3, 2)
 mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
                   mapping = mapping,
-                  initial_refinement_level = 1)
+                  initial_refinement_level = 1,
+                  periodicity = (true, true))
 
 # Note: This is actually a `p4est_quadrant_t` which is much bigger than the
 # following struct. But we only need the first three fields for our purpose.
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index be76fb3f56b..01033b0d6a1 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -593,13 +593,8 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
                                                                                                              boundary_symbols)
     end
 
-    if typeof(connectivity) <: Ptr{p4est_connectivity}
-        cmesh = t8_cmesh_new_from_p4est(connectivity, mpi_comm(), 0)
-    elseif typeof(connectivity) <: Ptr{p8est_connectivity}
-        cmesh = t8_cmesh_new_from_p8est(connectivity, mpi_comm(), 0)
-    else
-        throw("`connectivity` is not of type `Ptr{p*est_connectivity}`.")
-    end
+    cmesh = t8_cmesh_new_from_connectivity(connectivity, mpi_comm())
+    p4est_connectivity_destroy(connectivity)
 
     do_face_ghost = mpi_isparallel()
     scheme = t8_scheme_new_default_cxx()
@@ -610,6 +605,14 @@ function T8codeMesh(meshfile::AbaqusFile{NDIMS};
                              boundary_names, "")
 end
 
+function t8_cmesh_new_from_connectivity(connectivity::Ptr{p4est_connectivity}, comm)
+    return t8_cmesh_new_from_p4est(connectivity, comm, 0)
+end
+
+function t8_cmesh_new_from_connectivity(connectivity::Ptr{p8est_connectivity}, comm)
+    return t8_cmesh_new_from_p8est(connectivity, comm, 0)
+end
+
 struct adapt_callback_passthrough
     adapt_callback::Function
     user_data::Any
diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index d99775f743d..7f318b9fd2d 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -59,6 +59,16 @@ end
     end
 end
 
+@trixi_testset "test t8code mesh from ABAQUS HOHQMesh file" begin
+    @test begin
+        # Unstructured ABAQUS mesh file created with HOHQMesh..
+        file_path = Trixi.download("https://gist.githubusercontent.com/jmark/9e0da4306e266617eeb19bc56b0e7feb/raw/e6856e1deb648a807f6bb6d6dcacff9e55d94e2a/round_2d_tank.inp",
+          joinpath(EXAMPLES_DIR, "round_2d_tank.inp"))
+        mesh = T8codeMesh(AbaqusFile{2}(file_path))
+        all(size(mesh.tree_node_coordinates) .== (2, 4, 4, 340))
+    end
+end
+
 @trixi_testset "elixir_advection_basic.jl" begin
     # This test is identical to the one in `test_p4est_2d.jl`.
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"),

From 48082e6cfa3ddb26dc5da761bebde9f62d8fc77e Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 30 Apr 2024 17:34:57 +0200
Subject: [PATCH 29/89] Further increasing code coverage.

---
 .../t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
index 4161f85e380..48f78dd6da3 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -18,13 +18,12 @@ f3(s) = SVector(s, -1.0 + sin(0.5 * pi * s))
 f4(s) = SVector(s, 1.0 + sin(0.5 * pi * s))
 
 faces = (f1, f2, f3, f4)
-mapping = Trixi.transfinite_mapping(faces)
 
 # Create T8codeMesh with 3 x 2 trees and 6 x 4 elements,
 # approximate the geometry with a smaller polydeg for testing.
 trees_per_dimension = (3, 2)
 mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
-                  mapping = mapping,
+                  faces = faces,
                   initial_refinement_level = 1,
                   periodicity = (true, true))
 

From 66ee1eb35d0b0fa33b5e721f847fd26a6267870e Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 30 Apr 2024 17:44:41 +0200
Subject: [PATCH 30/89] Re-introduced tests.

---
 test/test_t8code_3d.jl | 49 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 81d2a7cdd85..44cc604fac6 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -110,6 +110,22 @@ mkdir(outdir)
         end
     end
 
+    # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and
+    # longitudinal dimensions.
+    @trixi_testset "elixir_advection_cubed_sphere.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"),
+                            l2=[0.002006918015656413],
+                            linf=[0.027655117058380085])
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
@@ -282,6 +298,39 @@ mkdir(outdir)
             @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
         end
     end
+
+    # This test is identical to the one in `test_p4est_3d.jl` besides minor
+    # deviations in the expected error norms.
+    @trixi_testset "elixir_euler_baroclinic_instability.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_euler_baroclinic_instability.jl"),
+                            l2=[
+                                6.725093801700048e-7,
+                                0.00021710076010951073,
+                                0.0004386796338203878,
+                                0.00020836270267103122,
+                                0.07601887903440395,
+                            ],
+                            linf=[
+                                1.9107530539574924e-5,
+                                0.02980358831035801,
+                                0.048476331898047564,
+                                0.02200137344113612,
+                                4.848310144356219,
+                            ],
+                            tspan=(0.0, 1e2),
+                            # Decrease tolerance of adaptive time stepping to get similar results across different systems
+                            abstol=1.0e-9, reltol=1.0e-9,
+                            coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From b9b79a2f05807ef00f37d5df778ba3bde875c375 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 30 Apr 2024 17:45:37 +0200
Subject: [PATCH 31/89] Removed workaround in github workflow.

---
 .github/workflows/ci.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c373399172e..b4b3cfa1487 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,8 +111,6 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - run: julia -e 'using Pkg; Pkg.Registry.add(url="https://github.com/DLR-AMR/T8code.jl", rev="bump-to-t8code-2.0.0"))'
-          shell: bash
       - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
       - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1

From e3be166fc4cf7c8741008bb06cab7250bdc862c5 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 30 Apr 2024 17:48:24 +0200
Subject: [PATCH 32/89] Applied formatter.

---
 test/test_t8code_2d.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index 7f318b9fd2d..01917004caa 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -63,7 +63,7 @@ end
     @test begin
         # Unstructured ABAQUS mesh file created with HOHQMesh..
         file_path = Trixi.download("https://gist.githubusercontent.com/jmark/9e0da4306e266617eeb19bc56b0e7feb/raw/e6856e1deb648a807f6bb6d6dcacff9e55d94e2a/round_2d_tank.inp",
-          joinpath(EXAMPLES_DIR, "round_2d_tank.inp"))
+                                   joinpath(EXAMPLES_DIR, "round_2d_tank.inp"))
         mesh = T8codeMesh(AbaqusFile{2}(file_path))
         all(size(mesh.tree_node_coordinates) .== (2, 4, 4, 340))
     end

From 95ec84f5629d4ee1cb55ab3707224c26f0ec92e2 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Mon, 13 May 2024 14:44:58 +0200
Subject: [PATCH 33/89] adapt to merged t8code PR

---
 src/Trixi.jl                              |  2 --
 src/auxiliary/auxiliary.jl                |  9 -------
 src/solvers/dgsem_t8code/containers_2d.jl | 19 +++++++++-----
 src/solvers/dgsem_t8code/containers_3d.jl | 32 +++++++++++++++--------
 4 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/src/Trixi.jl b/src/Trixi.jl
index df39fb6dcfa..f3977f1f058 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -290,8 +290,6 @@ export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!,
        adapt_to_mesh_level,
        iplot, iplot!
 
-export GmshFile, AbaqusFile
-
 function __init__()
     init_mpi()
 
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index f3c54249816..44bc16ee38d 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -371,13 +371,4 @@ function download(src_url, file_path)
     return file_path
 end
 
-abstract type MeshFile{NDIMS} end
-
-struct GmshFile{NDIMS} <: MeshFile{NDIMS}
-    path::String
-end
-
-struct AbaqusFile{NDIMS} <: MeshFile{NDIMS}
-    path::String
-end
 end # @muladd
diff --git a/src/solvers/dgsem_t8code/containers_2d.jl b/src/solvers/dgsem_t8code/containers_2d.jl
index 104e5590a58..ce525bfdf65 100644
--- a/src/solvers/dgsem_t8code/containers_2d.jl
+++ b/src/solvers/dgsem_t8code/containers_2d.jl
@@ -4,7 +4,6 @@
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
 #! format: noindent
-#! format: off
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes.
 function calc_node_coordinates!(node_coordinates,
@@ -42,14 +41,22 @@ function calc_node_coordinates!(node_coordinates,
             t8_element_vertex_reference_coords(eclass_scheme, element, 0,
                                                pointer(element_coords))
 
-            nodes_out_x = 2 * (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[1]) .- 1
-            nodes_out_y = 2 * (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[2]) .- 1
+            nodes_out_x = 2 *
+                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[1]) .-
+                          1
+            nodes_out_y = 2 *
+                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[2]) .-
+                          1
 
-            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in)
-            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in)
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
+                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
+                                             baryweights_in)
 
             multiply_dimensionwise!(view(node_coordinates, :, :, :, current_index += 1),
-                                    matrix1, matrix2, view(mesh.tree_node_coordinates, :, :, :, global_itree + 1),
+                                    matrix1, matrix2,
+                                    view(mesh.tree_node_coordinates, :, :, :,
+                                         global_itree + 1),
                                     tmp1)
         end
     end
diff --git a/src/solvers/dgsem_t8code/containers_3d.jl b/src/solvers/dgsem_t8code/containers_3d.jl
index e1e58fafd85..261945f82b1 100644
--- a/src/solvers/dgsem_t8code/containers_3d.jl
+++ b/src/solvers/dgsem_t8code/containers_3d.jl
@@ -4,7 +4,6 @@
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
 #! format: noindent
-#! format: off
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes
 function calc_node_coordinates!(node_coordinates,
@@ -44,17 +43,28 @@ function calc_node_coordinates!(node_coordinates,
             t8_element_vertex_reference_coords(eclass_scheme, element, 0,
                                                pointer(element_coords))
 
-            nodes_out_x = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[1]) .- 1)
-            nodes_out_y = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[2]) .- 1)
-            nodes_out_z = (2 * (element_length * 0.5 * (nodes .+ 1) .+ element_coords[3]) .- 1)
-
-            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in)
-            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in)
-            polynomial_interpolation_matrix!(matrix3, mesh.nodes, nodes_out_z, baryweights_in)
-
-            multiply_dimensionwise!(view(node_coordinates, :, :, :, :, current_index += 1),
+            nodes_out_x = (2 *
+                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[1]) .-
+                           1)
+            nodes_out_y = (2 *
+                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[2]) .-
+                           1)
+            nodes_out_z = (2 *
+                           (element_length * 0.5 * (nodes .+ 1) .+ element_coords[3]) .-
+                           1)
+
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
+                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
+                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix3, mesh.nodes, nodes_out_z,
+                                             baryweights_in)
+
+            multiply_dimensionwise!(view(node_coordinates, :, :, :, :,
+                                         current_index += 1),
                                     matrix1, matrix2, matrix3,
-                                    view(mesh.tree_node_coordinates, :, :, :, :, global_itree + 1), tmp1)
+                                    view(mesh.tree_node_coordinates, :, :, :, :,
+                                         global_itree + 1), tmp1)
         end
     end
 

From 2e92ebcdeaf43dd6f69ee70e44d92a420e8b135d Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 14 May 2024 16:36:32 +0200
Subject: [PATCH 34/89] missed calling constructor

---
 src/auxiliary/auxiliary.jl | 1 -
 src/meshes/t8code_mesh.jl  | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 44bc16ee38d..972a748c56b 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -370,5 +370,4 @@ function download(src_url, file_path)
 
     return file_path
 end
-
 end # @muladd
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 8b9ff3ee5bd..5ae3569a76d 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -668,6 +668,8 @@ function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_
         boundary_names[5, itree] = :inside
         boundary_names[6, itree] = :outside
     end
+
+    return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg)
 end
 
 struct adapt_callback_passthrough

From 81204099762a86b21280be828c9af48116f2d904 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Thu, 13 Jun 2024 18:36:12 +0200
Subject: [PATCH 35/89] Backup.

---
 src/auxiliary/t8code.jl   |   6 +-
 src/meshes/mesh_io.jl     |  70 ++++++++-
 src/meshes/t8code_mesh.jl | 299 +++++++++++++++++++++++++++++++++-----
 3 files changed, 336 insertions(+), 39 deletions(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 7c1399fc803..c6bf45c128e 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -17,7 +17,8 @@ function init_t8code()
         # Initialize the sc library, has to happen before we initialize t8code.
         let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
             T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
-                                 T8code.Libt8.SC_LP_ERROR)
+                                 T8code.Libt8.SC_LP_DEBUG)
+                                 # T8code.Libt8.SC_LP_ERROR)
         end
 
         if T8code.Libt8.p4est_is_initialized() == 0
@@ -26,7 +27,8 @@ function init_t8code()
         end
 
         # Initialize t8code with log level ERROR to prevent a lot of output in AMR simulations.
-        t8_init(T8code.Libt8.SC_LP_ERROR)
+        # t8_init(T8code.Libt8.SC_LP_ERROR)
+        t8_init(T8code.Libt8.SC_LP_DEBUG)
 
         if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
             # Normally, `sc_finalize` should always be called during shutdown of an
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index b74a3b4d642..be14b155777 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -225,11 +225,50 @@ function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_paralle
     return filename
 end
 
-# TODO: Implement this function as soon as there is support for this in `t8code`.
-function save_mesh_file(mesh::T8codeMesh, output_directory, timestep, mpi_parallel)
-    error("Mesh file output not supported yet for `T8codeMesh`.")
+## # TODO: Implement this function as soon as there is support for this in `t8code`.
+## function save_mesh_file(mesh::T8codeMesh, output_directory, timestep, mpi_parallel)
+##     error("Mesh file output not supported yet for `T8codeMesh`.")
+## 
+##     return joinpath(output_directory, "dummy_mesh.h5")
+## end
+
+function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
+                        mpi_parallel::False)
+
+    elemIDs, neighIDs, faces, duals, orientations = get_cmesh_info(mesh)
+
+    levels = trixi_t8_get_local_element_levels(mesh.forest)
+
+    # Create output directory (if it does not exist).
+    mkpath(output_directory)
+
+    # Determine file name based on existence of meaningful time step.
+    if timestep > 0
+        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+    else
+        filename = joinpath(output_directory, "mesh.h5")
+    end
+
+    # Open file (clobber existing content).
+    h5open(filename, "w") do file
+        # Add context information as attributes.
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["ntrees"] = t8_forest_get_num_local_trees(mesh.forest)
+        attributes(file)["nelements"] = ncells(mesh)
 
-    return joinpath(output_directory, "dummy_mesh.h5")
+        file["tree_node_coordinates"] = mesh.tree_node_coordinates
+        file["nodes"] = Vector(mesh.nodes)
+        file["boundary_names"] = mesh.boundary_names .|> String
+        file["elemIDs"] = elemIDs
+        file["neighIDs"] = neighIDs
+        file["faces"] = faces
+        file["duals"] = duals
+        file["orientations"] = orientations
+        file["levels"] = levels
+    end
+
+    return filename
 end
 
 """
@@ -322,7 +361,29 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
 
         mesh = P4estMesh{ndims}(p4est, tree_node_coordinates,
                                 nodes, boundary_names, mesh_file, false, true)
+
+    elseif mesh_type == "T8codeMesh"
+        ndims, ntrees, nelements, tree_node_coordinates,
+        nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels = h5open(mesh_file, "r") do file
+            return read(attributes(file)["ndims"]),
+                   read(attributes(file)["ntrees"]),
+                   read(attributes(file)["nelements"]),
+                   read(file["tree_node_coordinates"]),
+                   read(file["nodes"]),
+                   read(file["boundary_names"]),
+                   read(file["elemIDs"]),
+                   read(file["neighIDs"]),
+                   read(file["faces"]),
+                   read(file["duals"]),
+                   read(file["orientations"]),
+                   read(file["levels"])
+        end
+
+        boundary_names = boundary_names_ .|> Symbol
+
+        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
     else
+
         error("Unknown mesh type!")
     end
 
@@ -477,4 +538,5 @@ function load_mesh!(mesh::ParallelTreeMesh, mesh_file::AbstractString)
 
     return mesh
 end
+
 end # @muladd
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 10a042be3ba..34eba17a535 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -108,6 +108,126 @@ Main mesh constructor for the `T8codeMesh` wrapping around a given t8code
 `forest` object. This constructor is typically called by other `T8codeMesh`
 constructors.
 
+# Arguments
+- `forest`: Pointer to a t8code forest.
+- `boundary_names`: List of boundary names.
+- `polydeg::Integer`: Polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+- `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+"""
+
+function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+    # Construct the cmesh from `interfaces` and `orientations`.
+    Trixi.cmesh_ref = Ref(t8_cmesh_t())
+    t8_cmesh_init(Trixi.cmesh_ref)
+    cmesh = Trixi.cmesh_ref[]
+
+    Trixi.linear_geom = Trixi.t8_geometry_linear_new(2)
+    Trixi.linear_geom_ptr = pointer_from_objref(Ref(Trixi.linear_geom))
+
+    # Use linear geometry for now. There is no real Lagrange geometry
+    # implementation yet in t8code.
+    t8_cmesh_register_geometry(cmesh, Trixi.linear_geom_ptr)
+
+    N = length(nodes)
+    vertices = zeros(3 * 2^ndims) # quads/hexs only
+
+    eclass = ndims > 2 ? T8_ECLASS_HEX : T8_ECLASS_QUAD
+
+    for i = 1:ntrees
+        t8_cmesh_set_tree_class(cmesh, i-1, eclass)
+
+        if ndims == 2
+          vertices[1] = tree_node_coordinates[1,1,1,i]
+          vertices[2] = tree_node_coordinates[2,1,1,i]
+
+          vertices[4] = tree_node_coordinates[1,N,1,i]
+          vertices[5] = tree_node_coordinates[2,N,1,i]
+
+          vertices[7] = tree_node_coordinates[1,1,N,i]
+          vertices[8] = tree_node_coordinates[2,1,N,i]
+
+          vertices[10] = tree_node_coordinates[1,N,N,i]
+          vertices[11] = tree_node_coordinates[2,N,N,i]
+        else
+          vertices[1] = tree_node_coordinates[1,1,1,1,i]
+          vertices[2] = tree_node_coordinates[2,1,1,1,i]
+          vertices[3] = tree_node_coordinates[3,1,1,1,i]
+
+          vertices[4] = tree_node_coordinates[1,N,1,1,i]
+          vertices[5] = tree_node_coordinates[2,N,1,1,i]
+          vertices[6] = tree_node_coordinates[3,N,1,1,i]
+
+          vertices[7] = tree_node_coordinates[1,1,N,1,i]
+          vertices[8] = tree_node_coordinates[2,1,N,1,i]
+          vertices[9] = tree_node_coordinates[3,1,N,1,i]
+
+          vertices[10] = tree_node_coordinates[1,N,N,1,i]
+          vertices[11] = tree_node_coordinates[2,N,N,1,i]
+          vertices[12] = tree_node_coordinates[3,N,N,1,i]
+
+          vertices[13] = tree_node_coordinates[1,1,1,N,i]
+          vertices[14] = tree_node_coordinates[2,1,1,N,i]
+          vertices[15] = tree_node_coordinates[3,1,1,N,i]
+
+          vertices[16] = tree_node_coordinates[1,N,1,N,i]
+          vertices[17] = tree_node_coordinates[2,N,1,N,i]
+          vertices[18] = tree_node_coordinates[3,N,1,N,i]
+
+          vertices[19] = tree_node_coordinates[1,1,N,N,i]
+          vertices[20] = tree_node_coordinates[2,1,N,N,i]
+          vertices[21] = tree_node_coordinates[3,1,N,N,i]
+
+          vertices[22] = tree_node_coordinates[1,N,N,N,i]
+          vertices[23] = tree_node_coordinates[2,N,N,N,i]
+          vertices[24] = tree_node_coordinates[3,N,N,N,i]
+        end
+
+        t8_cmesh_set_tree_vertices(cmesh, i-1, vertices, 3)
+    end
+
+    for i = 1:length(elemIDs)
+        t8_cmesh_set_join(cmesh, elemIDs[i], neighIDs[i], faces[i], duals[i], orientations[i])
+    end 
+
+    t8_cmesh_commit(cmesh, mpi_comm())
+
+    do_face_ghost = mpi_isparallel()
+    scheme = t8_scheme_new_default_cxx()
+    initial_refinement_level = 0
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
+  
+    function adapt_callback(forest, ltreeid, eclass_scheme, lelementid, elements, is_family,
+                            user_data)
+
+        level = t8_element_level(eclass_scheme, elements[1])
+
+        if level < levels[lelementid + 1]
+          return 1
+        elseif level > levels[lelementid + 1]
+          return -1
+        end
+
+        return 0
+    end
+
+    for level = 0:maximum(levels)
+      forest = adapt(forest, adapt_callback; recursive = false, balance = false,
+                partition = false, ghost = false, user_data = C_NULL)
+    end
+
+    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
+end
+
+"""
+    T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = 1, mapping = nothing)
+
+Main mesh constructor for the `T8codeMesh` wrapping around a given t8code
+`forest` object. This constructor is typically called by other `T8codeMesh`
+constructors.
+
 # Arguments
 - `forest`: Pointer to a t8code forest.
 - `boundary_names`: List of boundary names.
@@ -702,10 +822,10 @@ Adapt a `T8codeMesh` according to a user-defined `adapt_callback`.
     - `ghost = true`: Create a ghost layer for MPI data exchange.
     - `user_data = C_NULL`: Pointer to some arbitrary user-defined data.
 """
-function adapt!(mesh::T8codeMesh, adapt_callback; recursive = true, balance = true,
+function adapt(forest::Ptr{t8_forest}, adapt_callback; recursive = true, balance = true,
                 partition = true, ghost = true, user_data = C_NULL)
     # Check that forest is a committed, that is valid and usable, forest.
-    @assert t8_forest_is_committed(mesh.forest) != 0
+    @assert t8_forest_is_committed(forest) != 0
 
     # Init new forest.
     new_forest_ref = Ref{t8_forest_t}()
@@ -718,7 +838,7 @@ function adapt!(mesh::T8codeMesh, adapt_callback; recursive = true, balance = tr
         t8_forest_set_user_data(new_forest,
                                 pointer_from_objref(Ref(adapt_callback_passthrough(adapt_callback,
                                                                                    user_data))))
-        t8_forest_set_adapt(new_forest, mesh.forest,
+        t8_forest_set_adapt(new_forest, forest,
                             @t8_adapt_callback(adapt_callback_wrapper),
                             recursive)
         if balance
@@ -736,9 +856,12 @@ function adapt!(mesh::T8codeMesh, adapt_callback; recursive = true, balance = tr
         t8_forest_commit(new_forest)
     end
 
-    mesh.forest = new_forest
+    return new_forest
+end
 
-    return nothing
+function adapt!(mesh::T8codeMesh, adapt_callback; kwargs...)
+    # Call `t8_forest_ref(Ref(mesh.forest))` to keep it.
+    mesh.forest = adapt(mesh.forest, adapt_callback; kwargs...)
 end
 
 """
@@ -796,10 +919,14 @@ function get_global_first_element_ids(mesh::T8codeMesh)
 end
 
 function count_interfaces(mesh::T8codeMesh)
-    @assert t8_forest_is_committed(mesh.forest) != 0
+  return count_interfaces(mesh.forest,ndims(mesh))
+end
 
-    num_local_elements = t8_forest_get_local_num_elements(mesh.forest)
-    num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
+function count_interfaces(forest::Ptr{t8_forest}, ndims)
+    @assert t8_forest_is_committed(forest) != 0
+
+    num_local_elements = t8_forest_get_local_num_elements(forest)
+    num_local_trees = t8_forest_get_num_local_trees(forest)
 
     current_index = t8_locidx_t(0)
 
@@ -812,30 +939,30 @@ function count_interfaces(mesh::T8codeMesh)
 
     visited_global_mortar_ids = Set{UInt64}([])
 
-    max_level = t8_forest_get_maxlevel(mesh.forest) #UInt64
-    max_tree_num_elements = UInt64(2^ndims(mesh))^max_level
+    max_level = t8_forest_get_maxlevel(forest) #UInt64
+    max_tree_num_elements = UInt64(2^ndims)^max_level
 
     if mpi_isparallel()
-        ghost_num_trees = t8_forest_ghost_num_trees(mesh.forest)
+        ghost_num_trees = t8_forest_ghost_num_trees(forest)
 
         ghost_tree_element_offsets = [num_local_elements +
-                                      t8_forest_ghost_get_tree_element_offset(mesh.forest,
+                                      t8_forest_ghost_get_tree_element_offset(forest,
                                                                               itree)
                                       for itree in 0:(ghost_num_trees - 1)]
-        ghost_global_treeids = [t8_forest_ghost_get_global_treeid(mesh.forest, itree)
+        ghost_global_treeids = [t8_forest_ghost_get_global_treeid(forest, itree)
                                 for itree in 0:(ghost_num_trees - 1)]
     end
 
     for itree in 0:(num_local_trees - 1)
-        tree_class = t8_forest_get_tree_class(mesh.forest, itree)
-        eclass_scheme = t8_forest_get_eclass_scheme(mesh.forest, tree_class)
+        tree_class = t8_forest_get_tree_class(forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
 
-        num_elements_in_tree = t8_forest_get_tree_num_elements(mesh.forest, itree)
+        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
 
-        global_itree = t8_forest_global_tree_id(mesh.forest, itree)
+        global_itree = t8_forest_global_tree_id(forest, itree)
 
         for ielement in 0:(num_elements_in_tree - 1)
-            element = t8_forest_get_element_in_tree(mesh.forest, itree, ielement)
+            element = t8_forest_get_element_in_tree(forest, itree, ielement)
 
             level = t8_element_level(eclass_scheme, element)
 
@@ -855,7 +982,7 @@ function count_interfaces(mesh::T8codeMesh)
 
                 forest_is_balanced = Cint(1)
 
-                t8_forest_leaf_face_neighbors(mesh.forest, itree, element,
+                t8_forest_leaf_face_neighbors(forest, itree, element,
                                               pneighbor_leaves_ref, iface, dual_faces_ref,
                                               num_neighbors_ref,
                                               pelement_indices_ref, pneigh_scheme_ref,
@@ -989,6 +1116,8 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
     visited_global_mortar_ids = Set{UInt64}([])
     global_mortar_id_to_local = Dict{UInt64, Int}([])
 
+    cmesh = t8_forest_get_cmesh(mesh.forest)
+
     # Loop over all local trees.
     for itree in 0:(num_local_trees - 1)
         tree_class = t8_forest_get_tree_class(mesh.forest, itree)
@@ -1012,20 +1141,6 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 
             # Loop over all faces of the current local element.
             for iface in 0:(num_faces - 1)
-                # Compute the `orientation` of the touching faces.
-                if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
-                    cmesh = t8_forest_get_cmesh(mesh.forest)
-                    itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(mesh.forest, itree)
-                    iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
-                    orientation_ref = Ref{Cint}()
-
-                    t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
-                                               orientation_ref)
-                    orientation = orientation_ref[]
-                else
-                    orientation = zero(Cint)
-                end
-
                 pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
                 pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}()
                 pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
@@ -1092,6 +1207,19 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                 else
                     neighbor_level = t8_element_level(neighbor_scheme, neighbor_leaves[1])
 
+                    # Compute the `orientation` of the touching faces.
+                    if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
+                        itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(mesh.forest, itree)
+                        iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
+                        orientation_ref = Ref{Cint}()
+
+                        t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
+                                                   orientation_ref)
+                        orientation = orientation_ref[]
+                    else
+                        orientation = zero(Cint)
+                    end
+
                     # Local interface or mortar.
                     if all(neighbor_ielements .< num_local_elements)
 
@@ -1263,6 +1391,111 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
     return nothing
 end
 
+function get_cmesh_info(mesh::T8codeMesh)
+  @assert t8_forest_is_committed(mesh.forest) != 0
+  cmesh = t8_forest_get_cmesh(mesh.forest)
+  return get_cmesh_info(cmesh, ndims(mesh))
+end
+
+function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
+    # Avoid destroying cmesh when destroying the forest.
+    t8_cmesh_ref(cmesh)
+
+    # In order to retrieve the connectivity between cmesh elements, a uniform
+    # forest with one element per cmesh cell is initialized.
+    scheme = t8_scheme_new_default_cxx()
+    do_face_ghost = 0
+    initial_refinement_level = 0
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
+
+    counts = count_interfaces(forest, ndims)
+
+    elemIDs = zeros(Int, counts.interfaces)
+    neighIDs = zeros(Int, counts.interfaces)
+    orientations = zeros(Int32, counts.interfaces)
+    faces = zeros(Int8, counts.interfaces)
+    duals = zeros(Int8, counts.interfaces)
+
+    num_local_elements = t8_forest_get_local_num_elements(forest)
+    num_local_trees = t8_forest_get_num_local_trees(forest)
+
+    sfc_index = 0 # space-filling curve index
+    itf_index = 0 # interface index
+
+    # Loop over all local trees.
+    for itree in 0:(num_local_trees - 1)
+        tree_class = t8_forest_get_tree_class(forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
+
+        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
+
+        # Loop over all local elements of the current local tree.
+        for ielement in 0:(num_elements_in_tree - 1)
+            element = t8_forest_get_element_in_tree(forest, itree, ielement)
+            level = t8_element_level(eclass_scheme, element)
+            num_faces = t8_element_num_faces(eclass_scheme, element)
+
+            # Loop over all faces of the current local element.
+            for iface in 0:(num_faces - 1)
+                # Compute the `orientation` of the touching faces.
+                if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
+                    itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(forest, itree)
+                    iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
+                    orientation_ref = Ref{Cint}()
+
+                    t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
+                                               orientation_ref)
+                    orientation = orientation_ref[]
+                else
+                    orientation = zero(Cint)
+                end
+
+                pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
+                pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}()
+                pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
+
+                dual_faces_ref = Ref{Ptr{Cint}}()
+                num_neighbors_ref = Ref{Cint}()
+
+                forest_is_balanced = Cint(1)
+
+                # Query neighbor information from t8code.
+                t8_forest_leaf_face_neighbors(forest, itree, element,
+                                              pneighbor_leaves_ref, iface, dual_faces_ref,
+                                              num_neighbors_ref,
+                                              pelement_indices_ref, pneigh_scheme_ref,
+                                              forest_is_balanced)
+
+                num_neighbors = num_neighbors_ref[]
+                dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors)
+                neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[], num_neighbors)
+                neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors)
+                neighbor_scheme = pneigh_scheme_ref[]
+
+                # The condition ensures we only visit the interface once.
+                if num_neighbors == 1 && sfc_index <= neighbor_ielements[1]
+                    itf_index += 1
+                    elemIDs[itf_index] = sfc_index
+                    neighIDs[itf_index] = neighbor_ielements[1]
+                    orientations[itf_index] = orientation
+                    faces[itf_index] = iface
+                    duals[itf_index] = dual_faces[1]
+                end
+
+                t8_free(dual_faces_ref[])
+                t8_free(pneighbor_leaves_ref[])
+                t8_free(pelement_indices_ref[])
+            end # for iface
+                    
+            sfc_index += 1
+        end # for ielement
+    end # for itree
+
+    t8_forest_unref(Ref(forest))
+
+    return elemIDs, neighIDs, faces, duals, orientations
+end
+
 #! format: off
 @deprecate T8codeMesh{2}(conn::Ptr{p4est_connectivity}; kwargs...) T8codeMesh(conn::Ptr{p4est_connectivity}; kwargs...)
 @deprecate T8codeMesh{3}(conn::Ptr{p8est_connectivity}; kwargs...) T8codeMesh(conn::Ptr{p8est_connectivity}; kwargs...)

From 3ad35154c4b0ce67be822bcb5af497223db79630 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 14 Jun 2024 19:07:49 +0200
Subject: [PATCH 36/89] Add save callback to elixir.

---
 .../t8code_2d_dgsem/elixir_advection_unstructured_flag.jl   | 6 +++++-
 src/meshes/t8code_mesh.jl                                   | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
index e512f328234..d966ef2273d 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -53,13 +53,17 @@ summary_callback = SummaryCallback()
 # prints the results.
 analysis_callback = AnalysisCallback(semi, interval = 100)
 
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each
 # time step.
 stepsize_callback = StepsizeCallback(cfl = 1.4)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to
 # the ODE solver.
-callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
 
 ###############################################################################
 # Run the simulation.
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 34eba17a535..f30916c104b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,6 +26,8 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
+    unsaved_changes :: Bool
+
     function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
                                current_filename) where {NDIMS}
@@ -38,6 +40,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
         mesh.boundary_names = boundary_names
         mesh.current_filename = current_filename
         mesh.tree_node_coordinates = tree_node_coordinates
+        mesh.unsaved_changes = true
 
         finalizer(mesh) do mesh
             # When finalizing `mesh.forest`, `mesh.scheme` and `mesh.cmesh` are

From 23714f1eaa303327770a59f17a616a100e9e7451 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 18 Jun 2024 16:09:23 +0200
Subject: [PATCH 37/89] Backup.

---
 src/auxiliary/t8code.jl   |  4 +-
 src/meshes/mesh_io.jl     | 81 +++++++++++++++++++++++++++++++--------
 src/meshes/t8code_mesh.jl | 34 ++++++++++------
 3 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index c6bf45c128e..9017d8fb851 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -52,7 +52,7 @@ function trixi_t8_get_local_element_levels(forest)
     # Check that forest is a committed, that is valid and usable, forest.
     @assert t8_forest_is_committed(forest) != 0
 
-    levels = Vector{Int}(undef, t8_forest_get_local_num_elements(forest))
+    levels = Vector{UInt8}(undef, t8_forest_get_local_num_elements(forest))
 
     # Get the number of trees that have elements of this process.
     num_local_trees = t8_forest_get_num_local_trees(forest)
@@ -69,7 +69,7 @@ function trixi_t8_get_local_element_levels(forest)
         for ielement in 0:(num_elements_in_tree - 1)
             element = t8_forest_get_element_in_tree(forest, itree, ielement)
             current_index += 1
-            levels[current_index] = t8_element_level(eclass_scheme, element)
+            levels[current_index] = UInt8(t8_element_level(eclass_scheme, element))
         end # for
     end # for
 
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index be14b155777..c562853bb7f 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -225,22 +225,11 @@ function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_paralle
     return filename
 end
 
-## # TODO: Implement this function as soon as there is support for this in `t8code`.
-## function save_mesh_file(mesh::T8codeMesh, output_directory, timestep, mpi_parallel)
-##     error("Mesh file output not supported yet for `T8codeMesh`.")
-## 
-##     return joinpath(output_directory, "dummy_mesh.h5")
-## end
-
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
-                        mpi_parallel::False)
-
-    elemIDs, neighIDs, faces, duals, orientations = get_cmesh_info(mesh)
-
-    levels = trixi_t8_get_local_element_levels(mesh.forest)
+                        mpi_parallel)
 
     # Create output directory (if it does not exist).
-    mkpath(output_directory)
+    mpi_isroot() && mkpath(output_directory)
 
     # Determine file name based on existence of meaningful time step.
     if timestep > 0
@@ -249,12 +238,38 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         filename = joinpath(output_directory, "mesh.h5")
     end
 
+    levels = get_levels(mesh)
+    if mpi_parallel    
+      levels = MPI.Gather(levels, mpi_root(), mpi_comm())
+    end
+
+    num_global_trees = t8_forest_get_num_global_trees(mesh.forest)
+    num_elements_per_tree = zeros(t8_gloidx_t, num_global_trees)
+    num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
+    for local_tree_id in 0:(num_local_trees - 1)
+        num_local_elements_in_tree = t8_forest_get_tree_num_elements(mesh.forest, local_tree_id)
+        global_tree_id = t8_forest_global_tree_id(mesh.forest, local_tree_id)
+        num_elements_per_tree[global_tree_id + 1] = num_local_elements_in_tree
+    end
+
+    if mpi_parallel    
+      num_elements_per_tree = MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
+    end
+
+    # Since the mesh attributes are replicated on all ranks, only save from MPI
+    # root.
+    if !mpi_isroot()
+        return filename
+    end
+
+    elemIDs, neighIDs, faces, duals, orientations = get_cmesh_info(mesh)
+
     # Open file (clobber existing content).
     h5open(filename, "w") do file
         # Add context information as attributes.
         attributes(file)["mesh_type"] = get_name(mesh)
         attributes(file)["ndims"] = ndims(mesh)
-        attributes(file)["ntrees"] = t8_forest_get_num_local_trees(mesh.forest)
+        attributes(file)["ntrees"] = ntrees(mesh)
         attributes(file)["nelements"] = ncells(mesh)
 
         file["tree_node_coordinates"] = mesh.tree_node_coordinates
@@ -266,6 +281,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         file["duals"] = duals
         file["orientations"] = orientations
         file["levels"] = levels
+        file["num_elements_per_tree"] = num_elements_per_tree
     end
 
     return filename
@@ -364,7 +380,7 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
 
     elseif mesh_type == "T8codeMesh"
         ndims, ntrees, nelements, tree_node_coordinates,
-        nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels = h5open(mesh_file, "r") do file
+        nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree = h5open(mesh_file, "r") do file
             return read(attributes(file)["ndims"]),
                    read(attributes(file)["ntrees"]),
                    read(attributes(file)["nelements"]),
@@ -376,12 +392,13 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
                    read(file["faces"]),
                    read(file["duals"]),
                    read(file["orientations"]),
-                   read(file["levels"])
+                   read(file["levels"]),
+                   read(file["num_elements_per_tree"])
         end
 
         boundary_names = boundary_names_ .|> Symbol
 
-        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree)
     else
 
         error("Unknown mesh type!")
@@ -472,6 +489,36 @@ function load_mesh_parallel(mesh_file::AbstractString; n_cells_max, RealT)
 
         mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates,
                                  nodes, boundary_names, mesh_file, false, true)
+
+    elseif mesh_type == "T8codeMesh"
+
+        if mpi_isroot()
+            ndims, ntrees, nelements, tree_node_coordinates,
+            nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels = h5open(mesh_file, "r") do file
+                return read(attributes(file)["ndims"]),
+                       read(attributes(file)["ntrees"]),
+                       read(attributes(file)["nelements"]),
+                       read(file["tree_node_coordinates"]),
+                       read(file["nodes"]),
+                       read(file["boundary_names"]),
+                       read(file["elemIDs"]),
+                       read(file["neighIDs"]),
+                       read(file["faces"]),
+                       read(file["duals"]),
+                       read(file["orientations"]),
+                       read(file["levels"])
+            end
+
+            boundary_names = boundary_names_ .|> Symbol
+
+            data = (ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+            MPI.bcast(data, mpi_root(), mpi_comm())
+        else
+            data = MPI.bcast(nothing, mpi_root(), mpi_comm())
+            ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels = data
+        end
+
+        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
     else
         error("Unknown mesh type!")
     end
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index f30916c104b..7a0a11597ba 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -120,8 +120,7 @@ constructors.
 - `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
              the imported mesh to the physical domain. Use `nothing` for the identity map.
 """
-
-function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree)
     # Construct the cmesh from `interfaces` and `orientations`.
     Trixi.cmesh_ref = Ref(t8_cmesh_t())
     t8_cmesh_init(Trixi.cmesh_ref)
@@ -201,25 +200,34 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boun
     scheme = t8_scheme_new_default_cxx()
     initial_refinement_level = 0
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
-  
-    function adapt_callback(forest, ltreeid, eclass_scheme, lelementid, elements, is_family,
+
+    virtual_element_index = 1
+
+    cum_sum_num_elements_per_tree = cumsum(num_elements_per_tree)
+
+    function adapt_callback(forest, local_tree_id, eclass_scheme, local_element_id, elements, is_family,
                             user_data)
 
+        global_tree_id = t8_forest_global_tree_id(forest, local_tree_id)
+
+        if virtual_element_index > cum_sum_num_elements_per_tree[global_tree_id + 1]
+          return 0
+        end
+
         level = t8_element_level(eclass_scheme, elements[1])
 
-        if level < levels[lelementid + 1]
+        if level < levels[virtual_element_index]
           return 1
-        elseif level > levels[lelementid + 1]
-          return -1
         end
 
+        virtual_element_index += 1
         return 0
     end
 
-    for level = 0:maximum(levels)
-      forest = adapt(forest, adapt_callback; recursive = false, balance = false,
-                partition = false, ghost = false, user_data = C_NULL)
-    end
+    forest = adapt(forest, adapt_callback; recursive = true, balance = false,
+              partition = false, ghost = false, user_data = C_NULL)
+
+    # forest = t8_forest_partition(forest)
 
     return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
 end
@@ -1394,6 +1402,10 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
     return nothing
 end
 
+function get_levels(mesh::T8codeMesh)
+  return trixi_t8_get_local_element_levels(mesh.forest)
+end
+
 function get_cmesh_info(mesh::T8codeMesh)
   @assert t8_forest_is_committed(mesh.forest) != 0
   cmesh = t8_forest_get_cmesh(mesh.forest)

From 3f8ded29140d742c84cc70d05c50b48495e07442 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 19 Jun 2024 17:40:09 +0200
Subject: [PATCH 38/89] Refined code. Make it work in parallel.

---
 .../elixir_advection_unstructured_curved.jl   |   6 +-
 src/meshes/mesh_io.jl                         |  46 +++--
 src/meshes/t8code_mesh.jl                     | 166 +++++++-----------
 3 files changed, 98 insertions(+), 120 deletions(-)

diff --git a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
index fe6aa48e7d9..df1dbce2af1 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
@@ -69,6 +69,10 @@ summary_callback = SummaryCallback()
 analysis_interval = 100
 analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
@@ -76,7 +80,7 @@ stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback,
-                        stepsize_callback)
+                        save_solution, stepsize_callback)
 
 ###############################################################################
 # run the simulation
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index c562853bb7f..31c73ef6537 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -226,7 +226,7 @@ function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_paralle
 end
 
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
-                        mpi_parallel)
+                        mpi_parallel::Any)
 
     # Create output directory (if it does not exist).
     mpi_isroot() && mkpath(output_directory)
@@ -238,11 +238,13 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         filename = joinpath(output_directory, "mesh.h5")
     end
 
+    # Retrieve refinement levels of all elements.
     levels = get_levels(mesh)
-    if mpi_parallel    
+    if mpi_isparallel()
       levels = MPI.Gather(levels, mpi_root(), mpi_comm())
     end
 
+    # Retrieve number of elements per tree.
     num_global_trees = t8_forest_get_num_global_trees(mesh.forest)
     num_elements_per_tree = zeros(t8_gloidx_t, num_global_trees)
     num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
@@ -252,7 +254,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         num_elements_per_tree[global_tree_id + 1] = num_local_elements_in_tree
     end
 
-    if mpi_parallel    
+    if mpi_isparallel()
       num_elements_per_tree = MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
     end
 
@@ -262,7 +264,8 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         return filename
     end
 
-    elemIDs, neighIDs, faces, duals, orientations = get_cmesh_info(mesh)
+    # Retrieve face connectivity info of the coarse mesh.
+    treeIDs, neighIDs, faces, duals, orientations = get_cmesh_info(mesh)
 
     # Open file (clobber existing content).
     h5open(filename, "w") do file
@@ -275,7 +278,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         file["tree_node_coordinates"] = mesh.tree_node_coordinates
         file["nodes"] = Vector(mesh.nodes)
         file["boundary_names"] = mesh.boundary_names .|> String
-        file["elemIDs"] = elemIDs
+        file["treeIDs"] = treeIDs
         file["neighIDs"] = neighIDs
         file["faces"] = faces
         file["duals"] = duals
@@ -380,14 +383,15 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
 
     elseif mesh_type == "T8codeMesh"
         ndims, ntrees, nelements, tree_node_coordinates,
-        nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree = h5open(mesh_file, "r") do file
+        nodes, boundary_names_, treeIDs, neighIDs, faces, duals, orientations,
+        levels, num_elements_per_tree = h5open(mesh_file, "r") do file
             return read(attributes(file)["ndims"]),
                    read(attributes(file)["ntrees"]),
                    read(attributes(file)["nelements"]),
                    read(file["tree_node_coordinates"]),
                    read(file["nodes"]),
                    read(file["boundary_names"]),
-                   read(file["elemIDs"]),
+                   read(file["treeIDs"]),
                    read(file["neighIDs"]),
                    read(file["faces"]),
                    read(file["duals"]),
@@ -398,7 +402,9 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
 
         boundary_names = boundary_names_ .|> Symbol
 
-        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree)
+        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates,
+                          nodes, boundary_names, treeIDs, neighIDs, faces,
+                          duals, orientations, levels, num_elements_per_tree)
     else
 
         error("Unknown mesh type!")
@@ -489,36 +495,42 @@ function load_mesh_parallel(mesh_file::AbstractString; n_cells_max, RealT)
 
         mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates,
                                  nodes, boundary_names, mesh_file, false, true)
-
     elseif mesh_type == "T8codeMesh"
-
         if mpi_isroot()
-            ndims, ntrees, nelements, tree_node_coordinates,
-            nodes, boundary_names_, elemIDs, neighIDs, faces, duals, orientations, levels = h5open(mesh_file, "r") do file
+            ndims, ntrees, nelements, tree_node_coordinates, nodes,
+            boundary_names_, treeIDs, neighIDs, faces, duals, orientations, levels,
+            num_elements_per_tree = h5open(mesh_file, "r") do file
                 return read(attributes(file)["ndims"]),
                        read(attributes(file)["ntrees"]),
                        read(attributes(file)["nelements"]),
                        read(file["tree_node_coordinates"]),
                        read(file["nodes"]),
                        read(file["boundary_names"]),
-                       read(file["elemIDs"]),
+                       read(file["treeIDs"]),
                        read(file["neighIDs"]),
                        read(file["faces"]),
                        read(file["duals"]),
                        read(file["orientations"]),
-                       read(file["levels"])
+                       read(file["levels"]),
+                       read(file["num_elements_per_tree"])
             end
 
             boundary_names = boundary_names_ .|> Symbol
 
-            data = (ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+            data = (ndims, ntrees, nelements, tree_node_coordinates, nodes,
+                    boundary_names, treeIDs, neighIDs, faces, duals,
+                    orientations, levels, num_elements_per_tree)
             MPI.bcast(data, mpi_root(), mpi_comm())
         else
             data = MPI.bcast(nothing, mpi_root(), mpi_comm())
-            ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels = data
+            ndims, ntrees, nelements, tree_node_coordinates, nodes,
+            boundary_names, treeIDs, neighIDs, faces, duals, orientations, levels,
+            num_elements_per_tree = data
         end
 
-        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels)
+        mesh = T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates,
+                          nodes, boundary_names, treeIDs, neighIDs, faces,
+                          duals, orientations, levels, num_elements_per_tree)
     else
         error("Unknown mesh type!")
     end
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 7a0a11597ba..9fe6836db54 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -120,24 +120,25 @@ constructors.
 - `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
              the imported mesh to the physical domain. Use `nothing` for the identity map.
 """
-function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boundary_names, elemIDs, neighIDs, faces, duals, orientations, levels, num_elements_per_tree)
-    # Construct the cmesh from `interfaces` and `orientations`.
+function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
+                    boundary_names, elemIDs, neighIDs, faces, duals,
+                    orientations, levels, num_elements_per_tree)
     Trixi.cmesh_ref = Ref(t8_cmesh_t())
     t8_cmesh_init(Trixi.cmesh_ref)
     cmesh = Trixi.cmesh_ref[]
 
-    Trixi.linear_geom = Trixi.t8_geometry_linear_new(2)
-    Trixi.linear_geom_ptr = pointer_from_objref(Ref(Trixi.linear_geom))
-
     # Use linear geometry for now. There is no real Lagrange geometry
     # implementation yet in t8code.
+    Trixi.linear_geom = Trixi.t8_geometry_linear_new(2)
+    Trixi.linear_geom_ptr = pointer_from_objref(Ref(Trixi.linear_geom))
     t8_cmesh_register_geometry(cmesh, Trixi.linear_geom_ptr)
 
-    N = length(nodes)
-    vertices = zeros(3 * 2^ndims) # quads/hexs only
-
+    # Determine element class.
     eclass = ndims > 2 ? T8_ECLASS_HEX : T8_ECLASS_QUAD
 
+    # Store element vertices inside the cmesh.
+    N = length(nodes)
+    vertices = zeros(3 * 2^ndims) # quads/hexs only
     for i = 1:ntrees
         t8_cmesh_set_tree_class(cmesh, i-1, eclass)
 
@@ -190,44 +191,58 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes, boun
         t8_cmesh_set_tree_vertices(cmesh, i-1, vertices, 3)
     end
 
+    # Connect the coarse mesh elements.
     for i = 1:length(elemIDs)
         t8_cmesh_set_join(cmesh, elemIDs[i], neighIDs[i], faces[i], duals[i], orientations[i])
     end 
 
     t8_cmesh_commit(cmesh, mpi_comm())
 
+    # Init a new forest with just one element per tree.
     do_face_ghost = mpi_isparallel()
     scheme = t8_scheme_new_default_cxx()
     initial_refinement_level = 0
     forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
 
-    virtual_element_index = 1
-
     cum_sum_num_elements_per_tree = cumsum(num_elements_per_tree)
 
+    # Compute the offset within the to-be-reconstructed forest depending on the
+    # MPI rank resp. first global tree id.
+    virtual_element_index = 1 # one-based index
+    if mpi_rank() > 0
+        last_global_tree_id_of_preceding_rank = t8_forest_global_tree_id(forest, 0) - 1
+        virtual_element_index += cum_sum_num_elements_per_tree[last_global_tree_id_of_preceding_rank + 1]
+    end
+
     function adapt_callback(forest, local_tree_id, eclass_scheme, local_element_id, elements, is_family,
                             user_data)
 
+        # Check if we are already in the next tree in terms of the `virtual_element_index`.
         global_tree_id = t8_forest_global_tree_id(forest, local_tree_id)
-
         if virtual_element_index > cum_sum_num_elements_per_tree[global_tree_id + 1]
           return 0
         end
 
+        # Test if we already reached the targeted level.
         level = t8_element_level(eclass_scheme, elements[1])
-
         if level < levels[virtual_element_index]
-          return 1
+          return 1 # Go one refinement level deeper.
         end
 
+        # Targeted level is reached.
         virtual_element_index += 1
         return 0
     end
 
+    # The adapt callback refines the forest according to the `levels` array.
+    # For each tree the callback recursively increases the refinement level
+    # till it matches with the associated section in `levels.
     forest = adapt(forest, adapt_callback; recursive = true, balance = false,
               partition = false, ghost = false, user_data = C_NULL)
 
-    # forest = t8_forest_partition(forest)
+    # if mpi_isparallel()
+    #   forest = t8_forest_partition(forest)
+    # end
 
     return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
 end
@@ -1413,102 +1428,49 @@ function get_cmesh_info(mesh::T8codeMesh)
 end
 
 function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
-    # Avoid destroying cmesh when destroying the forest.
-    t8_cmesh_ref(cmesh)
+    num_trees = t8_cmesh_get_num_trees(cmesh)
+    num_faces = 2*ndims
 
-    # In order to retrieve the connectivity between cmesh elements, a uniform
-    # forest with one element per cmesh cell is initialized.
-    scheme = t8_scheme_new_default_cxx()
-    do_face_ghost = 0
-    initial_refinement_level = 0
-    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
+    num_interfaces = 0
 
-    counts = count_interfaces(forest, ndims)
+    dual_face_ref = Ref{Cint}()
+    orientation_ref = Ref{Cint}()
 
-    elemIDs = zeros(Int, counts.interfaces)
-    neighIDs = zeros(Int, counts.interfaces)
-    orientations = zeros(Int32, counts.interfaces)
-    faces = zeros(Int8, counts.interfaces)
-    duals = zeros(Int8, counts.interfaces)
+    # Count connected faces.
+    for itree in 0:(num_trees - 1)
+        for iface in 0:(num_faces - 1)
+            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref, C_NULL)
+            if itree < neigh_itree || itree == neigh_itree && iface < dual_face_ref[]
+                num_interfaces += 1
+            end
+        end
+    end
 
-    num_local_elements = t8_forest_get_local_num_elements(forest)
-    num_local_trees = t8_forest_get_num_local_trees(forest)
+    # Allocate arrays.
+    treeIDs = zeros(Int, num_interfaces)
+    neighIDs = zeros(Int, num_interfaces)
+    orientations = zeros(Int32, num_interfaces)
+    faces = zeros(Int8, num_interfaces)
+    duals = zeros(Int8, num_interfaces)
 
-    sfc_index = 0 # space-filling curve index
     itf_index = 0 # interface index
 
-    # Loop over all local trees.
-    for itree in 0:(num_local_trees - 1)
-        tree_class = t8_forest_get_tree_class(forest, itree)
-        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
-
-        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
-
-        # Loop over all local elements of the current local tree.
-        for ielement in 0:(num_elements_in_tree - 1)
-            element = t8_forest_get_element_in_tree(forest, itree, ielement)
-            level = t8_element_level(eclass_scheme, element)
-            num_faces = t8_element_num_faces(eclass_scheme, element)
-
-            # Loop over all faces of the current local element.
-            for iface in 0:(num_faces - 1)
-                # Compute the `orientation` of the touching faces.
-                if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
-                    itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(forest, itree)
-                    iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
-                    orientation_ref = Ref{Cint}()
-
-                    t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
-                                               orientation_ref)
-                    orientation = orientation_ref[]
-                else
-                    orientation = zero(Cint)
-                end
-
-                pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
-                pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}()
-                pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
-
-                dual_faces_ref = Ref{Ptr{Cint}}()
-                num_neighbors_ref = Ref{Cint}()
-
-                forest_is_balanced = Cint(1)
-
-                # Query neighbor information from t8code.
-                t8_forest_leaf_face_neighbors(forest, itree, element,
-                                              pneighbor_leaves_ref, iface, dual_faces_ref,
-                                              num_neighbors_ref,
-                                              pelement_indices_ref, pneigh_scheme_ref,
-                                              forest_is_balanced)
-
-                num_neighbors = num_neighbors_ref[]
-                dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors)
-                neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[], num_neighbors)
-                neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors)
-                neighbor_scheme = pneigh_scheme_ref[]
-
-                # The condition ensures we only visit the interface once.
-                if num_neighbors == 1 && sfc_index <= neighbor_ielements[1]
-                    itf_index += 1
-                    elemIDs[itf_index] = sfc_index
-                    neighIDs[itf_index] = neighbor_ielements[1]
-                    orientations[itf_index] = orientation
-                    faces[itf_index] = iface
-                    duals[itf_index] = dual_faces[1]
-                end
-
-                t8_free(dual_faces_ref[])
-                t8_free(pneighbor_leaves_ref[])
-                t8_free(pelement_indices_ref[])
-            end # for iface
-                    
-            sfc_index += 1
-        end # for ielement
-    end # for itree
-
-    t8_forest_unref(Ref(forest))
+    for itree in 0:(num_trees - 1)
+        for iface in 0:(num_faces - 1)
+            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref, orientation_ref)
+
+            if itree < neigh_itree || itree == neigh_itree && iface < dual_face_ref[]
+                itf_index += 1
+                treeIDs[itf_index] = itree
+                neighIDs[itf_index] = neigh_itree
+                orientations[itf_index] = orientation_ref[]
+                faces[itf_index] = iface
+                duals[itf_index] = dual_face_ref[]
+            end
+        end
+    end
 
-    return elemIDs, neighIDs, faces, duals, orientations
+    return treeIDs, neighIDs, faces, duals, orientations
 end
 
 #! format: off

From 2f5d22441b451c0fe9af5011751d45c0c7130f33 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 19 Jun 2024 17:47:15 +0200
Subject: [PATCH 39/89] Added support for parallelt8codemesh save solution
 callback.

---
 src/callbacks_step/save_solution_dg.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index deae8f7c930..555f41dbd66 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -91,7 +91,7 @@ function save_solution_file(u, time, dt, timestep,
 end
 
 function save_solution_file(u, time, dt, timestep,
-                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
                             dg::DG, cache,
                             solution_callback,
                             element_variables = Dict{Symbol, Any}(),
@@ -136,7 +136,7 @@ function save_solution_file(u, time, dt, timestep,
 end
 
 function save_solution_file_parallel(data, time, dt, timestep, n_vars,
-                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
                                      equations, dg::DG, cache,
                                      solution_variables, filename,
                                      element_variables = Dict{Symbol, Any}())

From d3a1b4958b5e1326379c5eb9c71c01ce5786c4e4 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 19 Jun 2024 17:49:10 +0200
Subject: [PATCH 40/89] Applied formatter.

---
 .../elixir_advection_unstructured_flag.jl     |   3 +-
 src/auxiliary/t8code.jl                       |   2 +-
 src/callbacks_step/save_solution_dg.jl        |   6 +-
 src/meshes/mesh_io.jl                         |   9 +-
 src/meshes/t8code_mesh.jl                     | 117 ++++++++++--------
 5 files changed, 73 insertions(+), 64 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
index d966ef2273d..aed6c755d5c 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -63,7 +63,8 @@ stepsize_callback = StepsizeCallback(cfl = 1.4)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to
 # the ODE solver.
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback)
 
 ###############################################################################
 # Run the simulation.
diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 9017d8fb851..83ef471e1c6 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -18,7 +18,7 @@ function init_t8code()
         let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
             T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
                                  T8code.Libt8.SC_LP_DEBUG)
-                                 # T8code.Libt8.SC_LP_ERROR)
+            # T8code.Libt8.SC_LP_ERROR)
         end
 
         if T8code.Libt8.p4est_is_initialized() == 0
diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index 555f41dbd66..33e4997ca93 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -91,7 +91,8 @@ function save_solution_file(u, time, dt, timestep,
 end
 
 function save_solution_file(u, time, dt, timestep,
-                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
+                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                        ParallelT8codeMesh}, equations,
                             dg::DG, cache,
                             solution_callback,
                             element_variables = Dict{Symbol, Any}(),
@@ -136,7 +137,8 @@ function save_solution_file(u, time, dt, timestep,
 end
 
 function save_solution_file_parallel(data, time, dt, timestep, n_vars,
-                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
+                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                                 ParallelT8codeMesh},
                                      equations, dg::DG, cache,
                                      solution_variables, filename,
                                      element_variables = Dict{Symbol, Any}())
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 31c73ef6537..8958936f771 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -241,7 +241,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
     # Retrieve refinement levels of all elements.
     levels = get_levels(mesh)
     if mpi_isparallel()
-      levels = MPI.Gather(levels, mpi_root(), mpi_comm())
+        levels = MPI.Gather(levels, mpi_root(), mpi_comm())
     end
 
     # Retrieve number of elements per tree.
@@ -249,13 +249,14 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
     num_elements_per_tree = zeros(t8_gloidx_t, num_global_trees)
     num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
     for local_tree_id in 0:(num_local_trees - 1)
-        num_local_elements_in_tree = t8_forest_get_tree_num_elements(mesh.forest, local_tree_id)
+        num_local_elements_in_tree = t8_forest_get_tree_num_elements(mesh.forest,
+                                                                     local_tree_id)
         global_tree_id = t8_forest_global_tree_id(mesh.forest, local_tree_id)
         num_elements_per_tree[global_tree_id + 1] = num_local_elements_in_tree
     end
 
     if mpi_isparallel()
-      num_elements_per_tree = MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
+        num_elements_per_tree = MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
     end
 
     # Since the mesh attributes are replicated on all ranks, only save from MPI
@@ -406,7 +407,6 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
                           nodes, boundary_names, treeIDs, neighIDs, faces,
                           duals, orientations, levels, num_elements_per_tree)
     else
-
         error("Unknown mesh type!")
     end
 
@@ -597,5 +597,4 @@ function load_mesh!(mesh::ParallelTreeMesh, mesh_file::AbstractString)
 
     return mesh
 end
-
 end # @muladd
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 9fe6836db54..d75741ca995 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,7 +26,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
-    unsaved_changes :: Bool
+    unsaved_changes::Bool
 
     function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
@@ -139,62 +139,63 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
     # Store element vertices inside the cmesh.
     N = length(nodes)
     vertices = zeros(3 * 2^ndims) # quads/hexs only
-    for i = 1:ntrees
-        t8_cmesh_set_tree_class(cmesh, i-1, eclass)
+    for i in 1:ntrees
+        t8_cmesh_set_tree_class(cmesh, i - 1, eclass)
 
         if ndims == 2
-          vertices[1] = tree_node_coordinates[1,1,1,i]
-          vertices[2] = tree_node_coordinates[2,1,1,i]
+            vertices[1] = tree_node_coordinates[1, 1, 1, i]
+            vertices[2] = tree_node_coordinates[2, 1, 1, i]
 
-          vertices[4] = tree_node_coordinates[1,N,1,i]
-          vertices[5] = tree_node_coordinates[2,N,1,i]
+            vertices[4] = tree_node_coordinates[1, N, 1, i]
+            vertices[5] = tree_node_coordinates[2, N, 1, i]
 
-          vertices[7] = tree_node_coordinates[1,1,N,i]
-          vertices[8] = tree_node_coordinates[2,1,N,i]
+            vertices[7] = tree_node_coordinates[1, 1, N, i]
+            vertices[8] = tree_node_coordinates[2, 1, N, i]
 
-          vertices[10] = tree_node_coordinates[1,N,N,i]
-          vertices[11] = tree_node_coordinates[2,N,N,i]
+            vertices[10] = tree_node_coordinates[1, N, N, i]
+            vertices[11] = tree_node_coordinates[2, N, N, i]
         else
-          vertices[1] = tree_node_coordinates[1,1,1,1,i]
-          vertices[2] = tree_node_coordinates[2,1,1,1,i]
-          vertices[3] = tree_node_coordinates[3,1,1,1,i]
+            vertices[1] = tree_node_coordinates[1, 1, 1, 1, i]
+            vertices[2] = tree_node_coordinates[2, 1, 1, 1, i]
+            vertices[3] = tree_node_coordinates[3, 1, 1, 1, i]
 
-          vertices[4] = tree_node_coordinates[1,N,1,1,i]
-          vertices[5] = tree_node_coordinates[2,N,1,1,i]
-          vertices[6] = tree_node_coordinates[3,N,1,1,i]
+            vertices[4] = tree_node_coordinates[1, N, 1, 1, i]
+            vertices[5] = tree_node_coordinates[2, N, 1, 1, i]
+            vertices[6] = tree_node_coordinates[3, N, 1, 1, i]
 
-          vertices[7] = tree_node_coordinates[1,1,N,1,i]
-          vertices[8] = tree_node_coordinates[2,1,N,1,i]
-          vertices[9] = tree_node_coordinates[3,1,N,1,i]
+            vertices[7] = tree_node_coordinates[1, 1, N, 1, i]
+            vertices[8] = tree_node_coordinates[2, 1, N, 1, i]
+            vertices[9] = tree_node_coordinates[3, 1, N, 1, i]
 
-          vertices[10] = tree_node_coordinates[1,N,N,1,i]
-          vertices[11] = tree_node_coordinates[2,N,N,1,i]
-          vertices[12] = tree_node_coordinates[3,N,N,1,i]
+            vertices[10] = tree_node_coordinates[1, N, N, 1, i]
+            vertices[11] = tree_node_coordinates[2, N, N, 1, i]
+            vertices[12] = tree_node_coordinates[3, N, N, 1, i]
 
-          vertices[13] = tree_node_coordinates[1,1,1,N,i]
-          vertices[14] = tree_node_coordinates[2,1,1,N,i]
-          vertices[15] = tree_node_coordinates[3,1,1,N,i]
+            vertices[13] = tree_node_coordinates[1, 1, 1, N, i]
+            vertices[14] = tree_node_coordinates[2, 1, 1, N, i]
+            vertices[15] = tree_node_coordinates[3, 1, 1, N, i]
 
-          vertices[16] = tree_node_coordinates[1,N,1,N,i]
-          vertices[17] = tree_node_coordinates[2,N,1,N,i]
-          vertices[18] = tree_node_coordinates[3,N,1,N,i]
+            vertices[16] = tree_node_coordinates[1, N, 1, N, i]
+            vertices[17] = tree_node_coordinates[2, N, 1, N, i]
+            vertices[18] = tree_node_coordinates[3, N, 1, N, i]
 
-          vertices[19] = tree_node_coordinates[1,1,N,N,i]
-          vertices[20] = tree_node_coordinates[2,1,N,N,i]
-          vertices[21] = tree_node_coordinates[3,1,N,N,i]
+            vertices[19] = tree_node_coordinates[1, 1, N, N, i]
+            vertices[20] = tree_node_coordinates[2, 1, N, N, i]
+            vertices[21] = tree_node_coordinates[3, 1, N, N, i]
 
-          vertices[22] = tree_node_coordinates[1,N,N,N,i]
-          vertices[23] = tree_node_coordinates[2,N,N,N,i]
-          vertices[24] = tree_node_coordinates[3,N,N,N,i]
+            vertices[22] = tree_node_coordinates[1, N, N, N, i]
+            vertices[23] = tree_node_coordinates[2, N, N, N, i]
+            vertices[24] = tree_node_coordinates[3, N, N, N, i]
         end
 
-        t8_cmesh_set_tree_vertices(cmesh, i-1, vertices, 3)
+        t8_cmesh_set_tree_vertices(cmesh, i - 1, vertices, 3)
     end
 
     # Connect the coarse mesh elements.
-    for i = 1:length(elemIDs)
-        t8_cmesh_set_join(cmesh, elemIDs[i], neighIDs[i], faces[i], duals[i], orientations[i])
-    end 
+    for i in 1:length(elemIDs)
+        t8_cmesh_set_join(cmesh, elemIDs[i], neighIDs[i], faces[i], duals[i],
+                          orientations[i])
+    end
 
     t8_cmesh_commit(cmesh, mpi_comm())
 
@@ -202,7 +203,8 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
     do_face_ghost = mpi_isparallel()
     scheme = t8_scheme_new_default_cxx()
     initial_refinement_level = 0
-    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, mpi_comm())
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost,
+                                   mpi_comm())
 
     cum_sum_num_elements_per_tree = cumsum(num_elements_per_tree)
 
@@ -214,19 +216,20 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
         virtual_element_index += cum_sum_num_elements_per_tree[last_global_tree_id_of_preceding_rank + 1]
     end
 
-    function adapt_callback(forest, local_tree_id, eclass_scheme, local_element_id, elements, is_family,
+    function adapt_callback(forest, local_tree_id, eclass_scheme, local_element_id,
+                            elements, is_family,
                             user_data)
 
         # Check if we are already in the next tree in terms of the `virtual_element_index`.
         global_tree_id = t8_forest_global_tree_id(forest, local_tree_id)
         if virtual_element_index > cum_sum_num_elements_per_tree[global_tree_id + 1]
-          return 0
+            return 0
         end
 
         # Test if we already reached the targeted level.
         level = t8_element_level(eclass_scheme, elements[1])
         if level < levels[virtual_element_index]
-          return 1 # Go one refinement level deeper.
+            return 1 # Go one refinement level deeper.
         end
 
         # Targeted level is reached.
@@ -238,7 +241,7 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
     # For each tree the callback recursively increases the refinement level
     # till it matches with the associated section in `levels.
     forest = adapt(forest, adapt_callback; recursive = true, balance = false,
-              partition = false, ghost = false, user_data = C_NULL)
+                   partition = false, ghost = false, user_data = C_NULL)
 
     # if mpi_isparallel()
     #   forest = t8_forest_partition(forest)
@@ -849,7 +852,7 @@ Adapt a `T8codeMesh` according to a user-defined `adapt_callback`.
     - `user_data = C_NULL`: Pointer to some arbitrary user-defined data.
 """
 function adapt(forest::Ptr{t8_forest}, adapt_callback; recursive = true, balance = true,
-                partition = true, ghost = true, user_data = C_NULL)
+               partition = true, ghost = true, user_data = C_NULL)
     # Check that forest is a committed, that is valid and usable, forest.
     @assert t8_forest_is_committed(forest) != 0
 
@@ -945,7 +948,7 @@ function get_global_first_element_ids(mesh::T8codeMesh)
 end
 
 function count_interfaces(mesh::T8codeMesh)
-  return count_interfaces(mesh.forest,ndims(mesh))
+    return count_interfaces(mesh.forest, ndims(mesh))
 end
 
 function count_interfaces(forest::Ptr{t8_forest}, ndims)
@@ -1235,11 +1238,13 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 
                     # Compute the `orientation` of the touching faces.
                     if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
-                        itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(mesh.forest, itree)
+                        itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(mesh.forest,
+                                                                            itree)
                         iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
                         orientation_ref = Ref{Cint}()
 
-                        t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
+                        t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree,
+                                                   C_NULL,
                                                    orientation_ref)
                         orientation = orientation_ref[]
                     else
@@ -1418,18 +1423,18 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 end
 
 function get_levels(mesh::T8codeMesh)
-  return trixi_t8_get_local_element_levels(mesh.forest)
+    return trixi_t8_get_local_element_levels(mesh.forest)
 end
 
 function get_cmesh_info(mesh::T8codeMesh)
-  @assert t8_forest_is_committed(mesh.forest) != 0
-  cmesh = t8_forest_get_cmesh(mesh.forest)
-  return get_cmesh_info(cmesh, ndims(mesh))
+    @assert t8_forest_is_committed(mesh.forest) != 0
+    cmesh = t8_forest_get_cmesh(mesh.forest)
+    return get_cmesh_info(cmesh, ndims(mesh))
 end
 
 function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
     num_trees = t8_cmesh_get_num_trees(cmesh)
-    num_faces = 2*ndims
+    num_faces = 2 * ndims
 
     num_interfaces = 0
 
@@ -1439,7 +1444,8 @@ function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
     # Count connected faces.
     for itree in 0:(num_trees - 1)
         for iface in 0:(num_faces - 1)
-            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref, C_NULL)
+            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref,
+                                                     C_NULL)
             if itree < neigh_itree || itree == neigh_itree && iface < dual_face_ref[]
                 num_interfaces += 1
             end
@@ -1457,7 +1463,8 @@ function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
 
     for itree in 0:(num_trees - 1)
         for iface in 0:(num_faces - 1)
-            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref, orientation_ref)
+            neigh_itree = t8_cmesh_get_face_neighbor(cmesh, itree, iface, dual_face_ref,
+                                                     orientation_ref)
 
             if itree < neigh_itree || itree == neigh_itree && iface < dual_face_ref[]
                 itf_index += 1

From 9ef91c6d04c1e628d398a5dade1142c3e7bb5609 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Thu, 20 Jun 2024 14:53:12 +0200
Subject: [PATCH 41/89] Updated examples and tests.

---
 ...ixir_advection_amr_solution_independent.jl |  6 ++
 .../elixir_advection_amr_unstructured_flag.jl |  9 ++
 .../elixir_advection_extended.jl              | 85 +++++++++++++++++++
 .../elixir_advection_nonconforming_flag.jl    |  6 +-
 .../elixir_advection_restart.jl               | 43 ++++++++++
 .../elixir_advection_restart_amr.jl           | 54 ++++++++++++
 .../elixir_euler_free_stream.jl               |  6 ++
 .../t8code_2d_dgsem/elixir_euler_sedov.jl     |  5 ++
 .../elixir_euler_shockcapturing_ec.jl         |  6 ++
 ...e_terms_nonconforming_unstructured_flag.jl |  9 ++
 .../elixir_eulergravity_convergence.jl        |  9 ++
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl  |  6 ++
 .../elixir_shallowwater_source_terms.jl       |  6 +-
 .../t8code_3d_dgsem/elixir_advection_amr.jl   |  6 ++
 ...lixir_advection_amr_unstructured_curved.jl | 10 +++
 .../t8code_3d_dgsem/elixir_advection_basic.jl | 10 ++-
 .../elixir_advection_nonconforming.jl         |  6 +-
 .../elixir_advection_restart.jl               | 41 +++++++++
 .../elixir_advection_unstructured_curved.jl   | 10 ++-
 examples/t8code_3d_dgsem/elixir_euler_ec.jl   |  5 ++
 .../elixir_euler_free_stream.jl               |  6 ++
 .../elixir_euler_free_stream_extruded.jl      |  6 ++
 .../t8code_3d_dgsem/elixir_euler_sedov.jl     |  5 ++
 ...terms_nonconforming_unstructured_curved.jl |  6 ++
 .../elixir_euler_source_terms_nonperiodic.jl  |  6 ++
 src/auxiliary/t8code.jl                       |  6 +-
 src/callbacks_step/save_restart_dg.jl         | 14 +--
 test/test_mpi_t8code_2d.jl                    | 18 ++++
 test/test_mpi_t8code_3d.jl                    | 18 ++++
 test/test_t8code_2d.jl                        | 46 +++++++---
 test/test_t8code_3d.jl                        | 18 ++++
 31 files changed, 455 insertions(+), 32 deletions(-)
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_extended.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_restart.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
 create mode 100644 examples/t8code_3d_dgsem/elixir_advection_restart.jl

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
index 1ed08e1961b..d10cb167cb0 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
@@ -115,6 +115,11 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 amr_controller = ControllerThreeLevel(semi,
                                       TrixiExtension.IndicatorSolutionIndependent(semi),
                                       base_level = 4,
@@ -130,6 +135,7 @@ stepsize_callback = StepsizeCallback(cfl = 1.6)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         amr_callback, stepsize_callback);
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 9138586cccf..352455d986f 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -54,6 +54,14 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
                                       base_level = 1,
                                       med_level = 2, med_threshold = 0.1,
@@ -67,6 +75,7 @@ stepsize_callback = StepsizeCallback(cfl = 0.7)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_restart, save_solution,
                         amr_callback, stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_advection_extended.jl b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
new file mode 100644
index 00000000000..44892faf40d
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
@@ -0,0 +1,85 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+initial_condition = initial_condition_convergence_test
+
+# BCs must be passed as Dict
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:x_neg => boundary_condition,
+                           :x_pos => boundary_condition,
+                           :y_neg => boundary_condition,
+                           :y_pos => boundary_condition)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+# The initial condition is 2-periodic
+coordinates_min = (-1.5, 1.3) # minimum coordinates (min(x), min(y))
+coordinates_max = (0.5, 5.3) # maximum coordinates (max(x), max(y))
+
+trees_per_dimension = (19, 37)
+
+# Create curved mesh with 19 x 37 elements
+mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
+                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                 periodicity = false)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan);
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     extra_analysis_integrals = (entropy, energy_total))
+
+# The AliveCallback prints short status information in regular intervals
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+# The SaveRestartCallback allows to save a file from which a Trixi.jl simulation can be restarted
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_restart, save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
index 48f78dd6da3..f080b640f0d 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -69,11 +69,15 @@ summary_callback = SummaryCallback()
 # The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
 analysis_callback = AnalysisCallback(semi, interval = 100)
 
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
 stepsize_callback = StepsizeCallback(cfl = 1.6)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
 
 ###############################################################################
 # run the simulation
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart.jl b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
new file mode 100644
index 00000000000..0f573714c1f
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
@@ -0,0 +1,43 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# create a restart file
+
+elixir_file = "elixir_advection_extended.jl"
+restart_file = "restart_000021.h5"
+
+trixi_include(@__MODULE__, joinpath(@__DIR__, elixir_file))
+
+###############################################################################
+# adapt the parameters that have changed compared to "elixir_advection_extended.jl"
+
+# Note: If you get a restart file from somewhere else, you need to provide
+# appropriate setups in the elixir loading a restart file
+
+restart_filename = joinpath("out", restart_file)
+mesh = load_mesh(restart_filename)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
+ode = semidiscretize(semi, tspan, restart_filename);
+
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false),
+                  dt = dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks, maxiters = 100_000);
+
+# Get the last time index and work with that.
+load_timestep!(integrator, restart_filename)
+
+###############################################################################
+# run the simulation
+
+sol = solve!(integrator)
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
new file mode 100644
index 00000000000..fd3623dd88b
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
@@ -0,0 +1,54 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# create a restart file
+
+elixir_file = "elixir_advection_extended.jl"
+restart_file = "restart_000021.h5"
+
+trixi_include(@__MODULE__, joinpath(@__DIR__, elixir_file))
+
+###############################################################################
+# adapt the parameters that have changed compared to "elixir_advection_extended.jl"
+
+# Note: If you get a restart file from somewhere else, you need to provide
+# appropriate setups in the elixir loading a restart file
+
+restart_filename = joinpath("out", restart_file)
+mesh = load_mesh(restart_filename)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
+ode = semidiscretize(semi, tspan, restart_filename);
+
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+# Add AMR callback
+amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
+                                      base_level = 0,
+                                      med_level = 0, med_threshold = 0.8,
+                                      max_level = 1, max_threshold = 1.2)
+amr_callback = AMRCallback(semi, amr_controller,
+                           interval = 5,
+                           adapt_initial_condition = true,
+                           adapt_initial_condition_only_refine = true)
+callbacks_ext = CallbackSet(amr_callback, callbacks.discrete_callbacks...)
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false),
+                  dt = dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks_ext, maxiters = 100_000);
+
+# Get the last time index and work with that.
+load_timestep!(integrator, restart_filename)
+
+###############################################################################
+# run the simulation
+
+sol = solve!(integrator)
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
index d9d2c65d988..b2d49e3ccfe 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
@@ -72,10 +72,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 2.0)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_euler_sedov.jl b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
index 82770a4050b..fae7d818ad8 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
@@ -79,11 +79,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 300,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
 stepsize_callback = StepsizeCallback(cfl = 0.5)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
index 9ebbd1d28c4..2a0c4a6ee20 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
@@ -50,11 +50,17 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 1.0)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
index 48684071d4b..19502f1ce0e 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
@@ -73,10 +73,19 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 0.8)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_restart, save_solution,
                         stepsize_callback)
 ###############################################################################
 # run the simulation
diff --git a/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
index cd10315945a..e63c1297882 100644
--- a/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
+++ b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
@@ -56,6 +56,14 @@ summary_callback = SummaryCallback()
 
 stepsize_callback = StepsizeCallback(cfl = 0.8)
 
+save_solution = SaveSolutionCallback(interval = 10,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
 analysis_interval = 100
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
@@ -63,6 +71,7 @@ analysis_callback = AnalysisCallback(semi_euler, interval = analysis_interval,
                                      save_analysis = true)
 
 callbacks = CallbackSet(summary_callback, stepsize_callback,
+                        save_restart, save_solution,
                         analysis_callback, alive_callback)
 
 ###############################################################################
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
index 592d5b15a85..a09ea46fa34 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -93,6 +93,11 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 amr_indicator = IndicatorLöhner(semi,
                                 variable = density_pressure)
 
@@ -113,6 +118,7 @@ glm_speed_callback = GlmSpeedCallback(glm_scale = 0.5, cfl = cfl)
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         amr_callback,
                         stepsize_callback,
                         glm_speed_callback)
diff --git a/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
index 3610639d554..688ddb2dbb5 100644
--- a/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
+++ b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
@@ -46,7 +46,11 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
-callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
+save_solution = SaveSolutionCallback(interval = 200,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
 
 ###############################################################################
 # run the simulation
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr.jl b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
index 5a4b2218d57..9ecc8955383 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
@@ -40,6 +40,11 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
                                       base_level = 4,
                                       med_level = 5, med_threshold = 0.1,
@@ -54,6 +59,7 @@ stepsize_callback = StepsizeCallback(cfl = 1.2)
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         amr_callback,
                         stepsize_callback)
 
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
index 1f9aa3449b0..a47d94b62d5 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
@@ -71,6 +71,14 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
                                       base_level = 1,
                                       med_level = 2, med_threshold = 0.1,
@@ -85,6 +93,8 @@ stepsize_callback = StepsizeCallback(cfl = 1.2)
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_restart,
+                        save_solution,
                         amr_callback,
                         stepsize_callback)
 
diff --git a/examples/t8code_3d_dgsem/elixir_advection_basic.jl b/examples/t8code_3d_dgsem/elixir_advection_basic.jl
index f49462035aa..ae97a73d182 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_basic.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_basic.jl
@@ -40,11 +40,19 @@ summary_callback = SummaryCallback()
 # The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
 analysis_callback = AnalysisCallback(semi, interval = 100)
 
+# The SaveRestartCallback allows to save a file from which a Trixi.jl simulation can be restarted
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback,
+callbacks = CallbackSet(summary_callback, analysis_callback, save_restart, save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl b/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
index 8d7a48370f5..0755a76ef45 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
@@ -66,11 +66,15 @@ summary_callback = SummaryCallback()
 # The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
 analysis_callback = AnalysisCallback(semi, interval = 100)
 
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
 stepsize_callback = StepsizeCallback(cfl = 1.6)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback,
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_advection_restart.jl b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
new file mode 100644
index 00000000000..b3dead42399
--- /dev/null
+++ b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
@@ -0,0 +1,41 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# create a restart file
+
+trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_basic.jl"),
+              trees_per_dimension = (2, 2, 2))
+
+###############################################################################
+# adapt the parameters that have changed compared to "elixir_advection_extended.jl"
+
+# Note: If you get a restart file from somewhere else, you need to provide
+# appropriate setups in the elixir loading a restart file
+
+restart_filename = joinpath("out", "restart_000010.h5")
+mesh = load_mesh(restart_filename)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
+ode = semidiscretize(semi, tspan, restart_filename);
+
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false),
+                  dt = dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks, maxiters = 100_000);
+
+# Get the last time index and work with that.
+load_timestep!(integrator, restart_filename)
+
+###############################################################################
+# run the simulation
+
+sol = solve!(integrator)
+summary_callback() # print the timer summary
diff --git a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
index df1dbce2af1..f49137e054d 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
@@ -69,17 +69,21 @@ summary_callback = SummaryCallback()
 analysis_interval = 100
 analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+# The SaveRestartCallback allows to save a file from which a Trixi.jl simulation can be restarted
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true)
+
 # The SaveSolutionCallback allows to save the solution to a file in regular intervals
 save_solution = SaveSolutionCallback(interval = 100,
                                      solution_variables = cons2prim)
 
-alive_callback = AliveCallback(analysis_interval = analysis_interval)
-
 # The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback,
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_restart,
                         save_solution, stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_ec.jl b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
index e1e4d850a86..e75b0f69636 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_ec.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
@@ -68,11 +68,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
 stepsize_callback = StepsizeCallback(cfl = 1.0)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
index 882e3aebebe..d45de658cc0 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
@@ -95,10 +95,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
index 777cccf7ad7..d24512a4cdd 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
@@ -83,10 +83,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_sedov.jl b/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
index 618b170b661..f897249ed2e 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
@@ -81,11 +81,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
 stepsize_callback = StepsizeCallback(cfl = 0.5)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback,
                         alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
index a06e7927dd0..4b87b646df9 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
@@ -97,10 +97,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 0.6)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         stepsize_callback);
 
 ###############################################################################
diff --git a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
index 7cb03bb312d..ce1662c8e50 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
@@ -47,10 +47,16 @@ analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
 stepsize_callback = StepsizeCallback(cfl = 0.6)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
+                        save_solution,
                         stepsize_callback)
 
 ###############################################################################
diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 83ef471e1c6..4c961e80008 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -17,8 +17,7 @@ function init_t8code()
         # Initialize the sc library, has to happen before we initialize t8code.
         let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
             T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
-                                 T8code.Libt8.SC_LP_DEBUG)
-            # T8code.Libt8.SC_LP_ERROR)
+            T8code.Libt8.SC_LP_ERROR)
         end
 
         if T8code.Libt8.p4est_is_initialized() == 0
@@ -27,8 +26,7 @@ function init_t8code()
         end
 
         # Initialize t8code with log level ERROR to prevent a lot of output in AMR simulations.
-        # t8_init(T8code.Libt8.SC_LP_ERROR)
-        t8_init(T8code.Libt8.SC_LP_DEBUG)
+        t8_init(T8code.Libt8.SC_LP_ERROR)
 
         if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
             # Normally, `sc_finalize` should always be called during shutdown of an
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index b83402c5f86..210d15d3982 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -48,7 +48,7 @@ function save_restart_file(u, time, dt, timestep,
 end
 
 function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh,
-                                       UnstructuredMesh2D, SerialP4estMesh},
+                                       UnstructuredMesh2D, SerialP4estMesh, SerialT8codeMesh},
                            equations, dg::DG, cache, restart_file)
 
     # allocate memory
@@ -88,7 +88,7 @@ function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh,
 end
 
 function save_restart_file(u, time, dt, timestep,
-                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
                            dg::DG, cache,
                            restart_callback)
     @unpack output_directory = restart_callback
@@ -105,7 +105,7 @@ function save_restart_file(u, time, dt, timestep,
 end
 
 function save_restart_file_parallel(u, time, dt, timestep,
-                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
                                     equations, dg::DG, cache,
                                     filename)
 
@@ -151,7 +151,7 @@ function save_restart_file_parallel(u, time, dt, timestep,
 end
 
 function save_restart_file_on_root(u, time, dt, timestep,
-                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
                                    equations, dg::DG, cache,
                                    filename)
 
@@ -204,7 +204,7 @@ function save_restart_file_on_root(u, time, dt, timestep,
     return filename
 end
 
-function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
                            dg::DG, cache, restart_file)
     if HDF5.has_parallel()
         load_restart_file_parallel(mesh, equations, dg, cache, restart_file)
@@ -213,7 +213,7 @@ function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equ
     end
 end
 
-function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
                                     equations, dg::DG, cache, restart_file)
 
     # Calculate element and node counts by MPI rank
@@ -264,7 +264,7 @@ function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estM
     return u_ode
 end
 
-function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
                                    equations, dg::DG, cache, restart_file)
 
     # Calculate element and node counts by MPI rank
diff --git a/test/test_mpi_t8code_2d.jl b/test/test_mpi_t8code_2d.jl
index 7c7fc03898c..c3a6ed7a253 100644
--- a/test/test_mpi_t8code_2d.jl
+++ b/test/test_mpi_t8code_2d.jl
@@ -111,6 +111,24 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_2d_dgsem")
         end
     end
 
+    @trixi_testset "elixir_advection_restart.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+                            l2=[4.507575525876275e-6],
+                            linf=[6.21489667023134e-5],
+                            # With the default `maxiters = 1` in coverage tests,
+                            # there would be no time steps after the restart.
+                            coverage_override=(maxiters = 100_000,))
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
                                      "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
diff --git a/test/test_mpi_t8code_3d.jl b/test/test_mpi_t8code_3d.jl
index a15690a7629..a0fd3868d45 100644
--- a/test/test_mpi_t8code_3d.jl
+++ b/test/test_mpi_t8code_3d.jl
@@ -87,6 +87,24 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_3d_dgsem")
         end
     end
 
+    @trixi_testset "elixir_advection_restart.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+                            l2=[0.002590388934758452],
+                            linf=[0.01840757696885409],
+                            # With the default `maxiters = 1` in coverage tests,
+                            # there would be no time steps after the restart.
+                            coverage_override=(maxiters = 100_000,))
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     # Compressible Euler
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index b63d2a105ac..f0607dd977d 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -15,20 +15,20 @@ mkdir(outdir)
 @testset "T8codeMesh2D" begin
 #! format: noindent
 
-@trixi_testset "test save_mesh_file" begin
-    @test_throws Exception begin
-        # Save mesh file support will be added in the future. The following
-        # lines of code are here for satisfying code coverage.
-
-        # Create dummy mesh.
-        mesh = T8codeMesh((1, 1), polydeg = 1,
-                          mapping = Trixi.coordinates2mapping((-1.0, -1.0), (1.0, 1.0)),
-                          initial_refinement_level = 1)
-
-        # This call throws an error.
-        Trixi.save_mesh_file(mesh, "dummy")
-    end
-end
+# @trixi_testset "test save_mesh_file" begin
+#     @test_throws Exception begin
+#         # Save mesh file support will be added in the future. The following
+#         # lines of code are here for satisfying code coverage.
+# 
+#         # Create dummy mesh.
+#         mesh = T8codeMesh((1, 1), polydeg = 1,
+#                           mapping = Trixi.coordinates2mapping((-1.0, -1.0), (1.0, 1.0)),
+#                           initial_refinement_level = 1)
+# 
+#         # This call throws an error.
+#         Trixi.save_mesh_file(mesh, "dummy")
+#     end
+# end
 
 @trixi_testset "test load mesh from path" begin
     mktempdir() do path
@@ -152,6 +152,24 @@ end
     end
 end
 
+@trixi_testset "elixir_advection_restart_amr.jl" begin
+    # This test is identical to the one in `test_p4est_2d.jl`.
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart_amr.jl"),
+                        l2=[2.869137983727866e-6],
+                        linf=[3.8353423270964804e-5],
+                        # With the default `maxiters = 1` in coverage tests,
+                        # there would be no time steps after the restart.
+                        coverage_override=(maxiters = 25,))
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
+
 @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
     # This test is identical to the one in `test_p4est_2d.jl`.
     @test_trixi_include(joinpath(EXAMPLES_DIR,
diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl
index 81d2a7cdd85..e5adeeef8f5 100644
--- a/test/test_t8code_3d.jl
+++ b/test/test_t8code_3d.jl
@@ -110,6 +110,24 @@ mkdir(outdir)
         end
     end
 
+    # This test is identical to the one in `test_p4est_3d.jl`.
+    @trixi_testset "elixir_advection_restart.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+                            l2=[0.002590388934758452],
+                            linf=[0.01840757696885409],
+                            # With the default `maxiters = 1` in coverage tests,
+                            # there would be no time steps after the restart.
+                            coverage_override=(maxiters = 100_000,))
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
+
     # This test is identical to the one in `test_p4est_3d.jl`.
     @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_curved.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,

From 2f96869b64e15f7434a459b08cbefbe9f4028336 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Thu, 20 Jun 2024 14:57:43 +0200
Subject: [PATCH 42/89] Applied formatter.

---
 .../elixir_advection_extended.jl              |  4 ++--
 .../elixir_advection_nonconforming_flag.jl    |  3 ++-
 src/auxiliary/t8code.jl                       |  2 +-
 src/callbacks_step/save_restart_dg.jl         | 21 ++++++++++++-------
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_extended.jl b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
index 44892faf40d..f7e06dd517e 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_extended.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
@@ -28,8 +28,8 @@ trees_per_dimension = (19, 37)
 
 # Create curved mesh with 19 x 37 elements
 mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
-                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
-                 periodicity = false)
+                  coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                  periodicity = false)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
index f080b640f0d..7230c8c0b9e 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -77,7 +77,8 @@ save_solution = SaveSolutionCallback(interval = 100,
 stepsize_callback = StepsizeCallback(cfl = 1.6)
 
 # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback)
 
 ###############################################################################
 # run the simulation
diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 4c961e80008..d7703b8bbb4 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -17,7 +17,7 @@ function init_t8code()
         # Initialize the sc library, has to happen before we initialize t8code.
         let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
             T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
-            T8code.Libt8.SC_LP_ERROR)
+                                 T8code.Libt8.SC_LP_ERROR)
         end
 
         if T8code.Libt8.p4est_is_initialized() == 0
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index 210d15d3982..c5bf2191c7a 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -48,7 +48,8 @@ function save_restart_file(u, time, dt, timestep,
 end
 
 function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh,
-                                       UnstructuredMesh2D, SerialP4estMesh, SerialT8codeMesh},
+                                       UnstructuredMesh2D, SerialP4estMesh,
+                                       SerialT8codeMesh},
                            equations, dg::DG, cache, restart_file)
 
     # allocate memory
@@ -88,7 +89,8 @@ function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh,
 end
 
 function save_restart_file(u, time, dt, timestep,
-                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
+                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                       ParallelT8codeMesh}, equations,
                            dg::DG, cache,
                            restart_callback)
     @unpack output_directory = restart_callback
@@ -105,7 +107,8 @@ function save_restart_file(u, time, dt, timestep,
 end
 
 function save_restart_file_parallel(u, time, dt, timestep,
-                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                                ParallelT8codeMesh},
                                     equations, dg::DG, cache,
                                     filename)
 
@@ -151,7 +154,8 @@ function save_restart_file_parallel(u, time, dt, timestep,
 end
 
 function save_restart_file_on_root(u, time, dt, timestep,
-                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
+                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                               ParallelT8codeMesh},
                                    equations, dg::DG, cache,
                                    filename)
 
@@ -204,7 +208,8 @@ function save_restart_file_on_root(u, time, dt, timestep,
     return filename
 end
 
-function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh}, equations,
+function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                       ParallelT8codeMesh}, equations,
                            dg::DG, cache, restart_file)
     if HDF5.has_parallel()
         load_restart_file_parallel(mesh, equations, dg, cache, restart_file)
@@ -213,7 +218,8 @@ function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, Para
     end
 end
 
-function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
+function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                                ParallelT8codeMesh},
                                     equations, dg::DG, cache, restart_file)
 
     # Calculate element and node counts by MPI rank
@@ -264,7 +270,8 @@ function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estM
     return u_ode
 end
 
-function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh, ParallelT8codeMesh},
+function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                               ParallelT8codeMesh},
                                    equations, dg::DG, cache, restart_file)
 
     # Calculate element and node counts by MPI rank

From df2bdf41b443e2864db835b8623af38df345c1a3 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 21 Jun 2024 11:17:55 +0200
Subject: [PATCH 43/89] Minor adjustments.

---
 src/callbacks_step/amr.jl | 2 ++
 src/meshes/t8code_mesh.jl | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index b0afd02aff8..8696ccd0d31 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -788,6 +788,8 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::T8codeMesh,
         reinitialize_boundaries!(semi.boundary_conditions, cache)
     end
 
+    mesh.unsaved_changes |= has_changed
+
     # Return true if there were any cells coarsened or refined, otherwise false.
     return has_changed
 end
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index d75741ca995..317b3a7d9b8 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -1045,7 +1045,7 @@ function count_interfaces(forest::Ptr{t8_forest}, ndims)
                         elseif level < neighbor_level
                             local_num_mpi_mortars += 1
 
-                            global_mortar_id = 2 * ndims(mesh) * current_linear_id + iface
+                            global_mortar_id = 2 * ndims * current_linear_id + iface
 
                         else # level > neighbor_level
                             neighbor_global_ghost_itree = ghost_global_treeids[findlast(ghost_tree_element_offsets .<=
@@ -1055,7 +1055,7 @@ function count_interfaces(forest::Ptr{t8_forest}, ndims)
                                                  t8_element_get_linear_id(neighbor_scheme,
                                                                           neighbor_leaves[1],
                                                                           max_level)
-                            global_mortar_id = 2 * ndims(mesh) * neighbor_linear_id +
+                            global_mortar_id = 2 * ndims * neighbor_linear_id +
                                                dual_faces[1]
 
                             if !(global_mortar_id in visited_global_mortar_ids)
@@ -1455,7 +1455,7 @@ function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
     # Allocate arrays.
     treeIDs = zeros(Int, num_interfaces)
     neighIDs = zeros(Int, num_interfaces)
-    orientations = zeros(Int32, num_interfaces)
+    orientations = zeros(Int8, num_interfaces)
     faces = zeros(Int8, num_interfaces)
     duals = zeros(Int8, num_interfaces)
 

From 7ee23b60e8281ff4a26ccb9ee11c820e91f1d546 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 24 Jun 2024 14:28:39 +0200
Subject: [PATCH 44/89] Code refinement. Enabled partitioning after mesh
 loading.

---
 src/meshes/mesh_io.jl     | 29 +++++++++++++++++++----
 src/meshes/t8code_mesh.jl | 50 ++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 8958936f771..49a99b516e9 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -225,6 +225,11 @@ function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_paralle
     return filename
 end
 
+# This routine works for both, serial and MPI parallel mode. The forest
+# information is collected on all ranks and then gathered by the root rank.
+# Since only the `levels` array of unsigned bytes is bascially independent per
+# rank it is not worth the effort to have a collective write to the HDF5 file.
+# Instead, `levels` gets gathered by the root rank and written in serial.
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
                         mpi_parallel::Any)
 
@@ -239,12 +244,25 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
     end
 
     # Retrieve refinement levels of all elements.
-    levels = get_levels(mesh)
+    local_levels = get_levels(mesh)
     if mpi_isparallel()
-        levels = MPI.Gather(levels, mpi_root(), mpi_comm())
+        count = [length(local_levels)]
+        counts = MPI.Gather(view(count, 1), mpi_root(), mpi_comm())
+
+        if mpi_isroot()
+            levels = similar(local_levels, ncellsglobal(mesh))
+            MPI.Gatherv!(local_levels, MPI.VBuffer(levels, counts),
+                         mpi_root(), mpi_comm())
+        else
+            MPI.Gatherv!(local_levels, nothing, mpi_root(), mpi_comm())
+        end
+    else
+        levels = local_levels
     end
 
-    # Retrieve number of elements per tree.
+    # Retrieve the number of elements per tree. Since a tree can be distributed
+    # among multiple ranks a reduction operation sums them all up. The latter
+    # is done on the root rank only.
     num_global_trees = t8_forest_get_num_global_trees(mesh.forest)
     num_elements_per_tree = zeros(t8_gloidx_t, num_global_trees)
     num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
@@ -256,7 +274,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
     end
 
     if mpi_isparallel()
-        num_elements_per_tree = MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
+        MPI.Reduce!(num_elements_per_tree, +, mpi_comm())
     end
 
     # Since the mesh attributes are replicated on all ranks, only save from MPI
@@ -274,7 +292,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
         attributes(file)["mesh_type"] = get_name(mesh)
         attributes(file)["ndims"] = ndims(mesh)
         attributes(file)["ntrees"] = ntrees(mesh)
-        attributes(file)["nelements"] = ncells(mesh)
+        attributes(file)["nelements"] = ncellsglobal(mesh)
 
         file["tree_node_coordinates"] = mesh.tree_node_coordinates
         file["nodes"] = Vector(mesh.nodes)
@@ -495,6 +513,7 @@ function load_mesh_parallel(mesh_file::AbstractString; n_cells_max, RealT)
 
         mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates,
                                  nodes, boundary_names, mesh_file, false, true)
+
     elseif mesh_type == "T8codeMesh"
         if mpi_isroot()
             ndims, ntrees, nelements, tree_node_coordinates, nodes,
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 317b3a7d9b8..10b9060422a 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -208,32 +208,33 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
 
     cum_sum_num_elements_per_tree = cumsum(num_elements_per_tree)
 
-    # Compute the offset within the to-be-reconstructed forest depending on the
+    global_element_id = 0 # zero-based index
+
+    # Compute the offset within the to-be-reconstructed forest. Depends on the
     # MPI rank resp. first global tree id.
-    virtual_element_index = 1 # one-based index
     if mpi_rank() > 0
         last_global_tree_id_of_preceding_rank = t8_forest_global_tree_id(forest, 0) - 1
-        virtual_element_index += cum_sum_num_elements_per_tree[last_global_tree_id_of_preceding_rank + 1]
+        global_element_id += cum_sum_num_elements_per_tree[last_global_tree_id_of_preceding_rank + 1]
     end
 
     function adapt_callback(forest, local_tree_id, eclass_scheme, local_element_id,
                             elements, is_family,
                             user_data)
 
-        # Check if we are already in the next tree in terms of the `virtual_element_index`.
+        # Check if we are already in the next tree in terms of the `global_element_id`.
         global_tree_id = t8_forest_global_tree_id(forest, local_tree_id)
-        if virtual_element_index > cum_sum_num_elements_per_tree[global_tree_id + 1]
+        if global_element_id + 1 > cum_sum_num_elements_per_tree[global_tree_id + 1]
             return 0
         end
 
         # Test if we already reached the targeted level.
         level = t8_element_level(eclass_scheme, elements[1])
-        if level < levels[virtual_element_index]
+        if level < levels[global_element_id + 1]
             return 1 # Go one refinement level deeper.
         end
 
         # Targeted level is reached.
-        virtual_element_index += 1
+        global_element_id += 1
         return 0
     end
 
@@ -243,9 +244,9 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
     forest = adapt(forest, adapt_callback; recursive = true, balance = false,
                    partition = false, ghost = false, user_data = C_NULL)
 
-    # if mpi_isparallel()
-    #   forest = t8_forest_partition(forest)
-    # end
+    if mpi_isparallel()
+      forest = partition(forest)
+    end
 
     return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
 end
@@ -878,7 +879,9 @@ function adapt(forest::Ptr{t8_forest}, adapt_callback; recursive = true, balance
             t8_forest_set_partition(new_forest, set_from, set_for_coarsening)
         end
 
-        t8_forest_set_ghost(new_forest, ghost, T8_GHOST_FACES) # Note: MPI support not available yet so it is a dummy call.
+        if ghost
+            t8_forest_set_ghost(new_forest, ghost, T8_GHOST_FACES)
+        end
 
         # The old forest is destroyed here.
         # Call `t8_forest_ref(Ref(mesh.forest))` to keep it.
@@ -914,27 +917,30 @@ function balance!(mesh::T8codeMesh)
     return nothing
 end
 
-"""
-    Trixi.partition!(mesh::T8codeMesh)
-
-Partition a `T8codeMesh` in order to redistribute elements evenly among MPI ranks.
-
-# Arguments
-- `mesh::T8codeMesh`: Initialized mesh object.
-"""
-function partition!(mesh::T8codeMesh)
+function partition(forest::Ptr{t8_forest})
     new_forest_ref = Ref{t8_forest_t}()
     t8_forest_init(new_forest_ref)
     new_forest = new_forest_ref[]
 
-    let set_from = mesh.forest, do_ghost = 1, allow_for_coarsening = 1
+    let set_from = forest, do_ghost = 1, allow_for_coarsening = 1
         t8_forest_set_partition(new_forest, set_from, allow_for_coarsening)
         t8_forest_set_ghost(new_forest, do_ghost, T8_GHOST_FACES)
         t8_forest_commit(new_forest)
     end
 
-    mesh.forest = new_forest
+    return new_forest
+end
+
+"""
+    Trixi.partition!(mesh::T8codeMesh)
 
+Partition a `T8codeMesh` in order to redistribute elements evenly among MPI ranks.
+
+# Arguments
+- `mesh::T8codeMesh`: Initialized mesh object.
+"""
+function partition!(mesh::T8codeMesh)
+    mesh.forest = partition(mesh.forest)
     return nothing
 end
 

From 0404f3b4b632398cd343af0fa8929949f20d1ed6 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 24 Jun 2024 14:38:36 +0200
Subject: [PATCH 45/89] Applied formatter and fixed typos.

---
 src/meshes/mesh_io.jl     | 2 +-
 src/meshes/t8code_mesh.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 49a99b516e9..00242ba0169 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -227,7 +227,7 @@ end
 
 # This routine works for both, serial and MPI parallel mode. The forest
 # information is collected on all ranks and then gathered by the root rank.
-# Since only the `levels` array of unsigned bytes is bascially independent per
+# Since only the `levels` array of unsigned bytes is basicially independent per
 # rank it is not worth the effort to have a collective write to the HDF5 file.
 # Instead, `levels` gets gathered by the root rank and written in serial.
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 10b9060422a..a7f949706d4 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -245,7 +245,7 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
                    partition = false, ghost = false, user_data = C_NULL)
 
     if mpi_isparallel()
-      forest = partition(forest)
+        forest = partition(forest)
     end
 
     return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")

From 99972ccd8656d0b7b24a4c80d6ce7b3a64e85e7f Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 24 Jun 2024 14:45:33 +0200
Subject: [PATCH 46/89] Removed commented out section.

---
 test/test_t8code_2d.jl | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index f0607dd977d..279e858e8e6 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -15,21 +15,6 @@ mkdir(outdir)
 @testset "T8codeMesh2D" begin
 #! format: noindent
 
-# @trixi_testset "test save_mesh_file" begin
-#     @test_throws Exception begin
-#         # Save mesh file support will be added in the future. The following
-#         # lines of code are here for satisfying code coverage.
-# 
-#         # Create dummy mesh.
-#         mesh = T8codeMesh((1, 1), polydeg = 1,
-#                           mapping = Trixi.coordinates2mapping((-1.0, -1.0), (1.0, 1.0)),
-#                           initial_refinement_level = 1)
-# 
-#         # This call throws an error.
-#         Trixi.save_mesh_file(mesh, "dummy")
-#     end
-# end
-
 @trixi_testset "test load mesh from path" begin
     mktempdir() do path
         @test_throws "Unknown file extension: .unknown_ext" begin

From f59322c91b162878951b2c8ee37a9542a951c935 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 24 Jun 2024 15:16:29 +0200
Subject: [PATCH 47/89] Added missing union type member.

---
 src/callbacks_step/save_solution_dg.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index 33e4997ca93..50da208ccef 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -200,7 +200,8 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars,
 end
 
 function save_solution_file_on_root(data, time, dt, timestep, n_vars,
-                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh,
+                                                ParallelT8codeMesh},
                                     equations, dg::DG, cache,
                                     solution_variables, filename,
                                     element_variables = Dict{Symbol, Any}())

From be2fcdd7892d19d3e422c1245a0505ea4aa09918 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 26 Jun 2024 13:57:28 +0200
Subject: [PATCH 48/89] Switching from UInt64 to UInt128 in global
 interface/mortar id computation.

---
 src/meshes/t8code_mesh.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index a7f949706d4..ec524fc2a4a 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -1,3 +1,5 @@
+using Printf
+
 """
     T8codeMesh{NDIMS} <: AbstractMesh{NDIMS}
 
@@ -972,10 +974,10 @@ function count_interfaces(forest::Ptr{t8_forest}, ndims)
     local_num_mpi_conform = 0
     local_num_mpi_mortars = 0
 
-    visited_global_mortar_ids = Set{UInt64}([])
+    visited_global_mortar_ids = Set{UInt128}([])
 
     max_level = t8_forest_get_maxlevel(forest) #UInt64
-    max_tree_num_elements = UInt64(2^ndims)^max_level
+    max_tree_num_elements = UInt128(2^ndims)^max_level
 
     if mpi_isparallel()
         ghost_num_trees = t8_forest_ghost_num_trees(forest)
@@ -1144,12 +1146,12 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 
     # Helper variables to compute unique global MPI interface/mortar ids.
     max_level = t8_forest_get_maxlevel(mesh.forest) #UInt64
-    max_tree_num_elements = UInt64(2^ndims(mesh))^max_level
+    max_tree_num_elements = (UInt128(2)^ndims(mesh))^max_level
 
     # These two variables help to ensure that we count MPI mortars from smaller
     # elements point of view only once.
-    visited_global_mortar_ids = Set{UInt64}([])
-    global_mortar_id_to_local = Dict{UInt64, Int}([])
+    visited_global_mortar_ids = Set{UInt128}([])
+    global_mortar_id_to_local = Dict{UInt128, Int}([])
 
     cmesh = t8_forest_get_cmesh(mesh.forest)
 

From 0283428954309fb140c67b87544e31a298f4ceb9 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 26 Jun 2024 13:58:18 +0200
Subject: [PATCH 49/89] Switching from UInt64 to UInt128 in global
 interface/mortar id computation (II).

---
 src/solvers/dgsem_t8code/containers_parallel.jl | 4 ++--
 src/solvers/dgsem_t8code/dg_parallel.jl         | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/solvers/dgsem_t8code/containers_parallel.jl b/src/solvers/dgsem_t8code/containers_parallel.jl
index 0cb3f5887a0..381f672f43d 100644
--- a/src/solvers/dgsem_t8code/containers_parallel.jl
+++ b/src/solvers/dgsem_t8code/containers_parallel.jl
@@ -21,8 +21,8 @@ function reinitialize_containers!(mesh::ParallelT8codeMesh, equations, dg::DGSEM
                      mpi_interfaces = mpi_interfaces,
 
                      # Temporary arrays for updating `mpi_cache`.
-                     global_mortar_ids = fill(UInt64(0), nmpimortars(mpi_mortars)),
-                     global_interface_ids = fill(UInt64(0), nmpiinterfaces(mpi_interfaces)),
+                     global_mortar_ids = fill(UInt128(0), nmpimortars(mpi_mortars)),
+                     global_interface_ids = fill(UInt128(0), nmpiinterfaces(mpi_interfaces)),
                      neighbor_ranks_mortar = Vector{Vector{Int}}(undef,
                                                                  nmpimortars(mpi_mortars)),
                      neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)))
diff --git a/src/solvers/dgsem_t8code/dg_parallel.jl b/src/solvers/dgsem_t8code/dg_parallel.jl
index ece614b7d75..d33d5445610 100644
--- a/src/solvers/dgsem_t8code/dg_parallel.jl
+++ b/src/solvers/dgsem_t8code/dg_parallel.jl
@@ -25,8 +25,8 @@ function create_cache(mesh::ParallelT8codeMesh, equations::AbstractEquations, dg
 
     mpi_mesh_info = (mpi_mortars = mpi_mortars,
                      mpi_interfaces = mpi_interfaces,
-                     global_mortar_ids = fill(UInt64(0), nmpimortars(mpi_mortars)),
-                     global_interface_ids = fill(UInt64(0),
+                     global_mortar_ids = fill(UInt128(0), nmpimortars(mpi_mortars)),
+                     global_interface_ids = fill(UInt128(0),
                                                  nmpiinterfaces(mpi_interfaces)),
                      neighbor_ranks_mortar = Vector{Vector{Int}}(undef,
                                                                  nmpimortars(mpi_mortars)),
@@ -75,7 +75,6 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelT8codeMesh,
                                                                                                         nvars,
                                                                                                         nnodes,
                                                                                                         uEltype)
-
     n_elements_global = Int(t8_forest_get_global_num_elements(mesh.forest))
     n_elements_local = Int(t8_forest_get_local_num_elements(mesh.forest))
 

From db9d4c63cdaa3aabc35156d2537bd53f022bb7a0 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 26 Jun 2024 13:59:01 +0200
Subject: [PATCH 50/89] Adding more tests.

---
 test/test_mpi_t8code_2d.jl | 10 +++++-----
 test/test_t8code_2d.jl     | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/test/test_mpi_t8code_2d.jl b/test/test_mpi_t8code_2d.jl
index c3a6ed7a253..db8e3025846 100644
--- a/test/test_mpi_t8code_2d.jl
+++ b/test/test_mpi_t8code_2d.jl
@@ -80,9 +80,9 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_2d_dgsem")
         @test_trixi_include(joinpath(EXAMPLES_DIR,
                                      "elixir_advection_amr_solution_independent.jl"),
                             # Expected errors are exactly the same as with TreeMesh!
-                            l2=[4.933027431215839e-5],
-                            linf=[0.00048678461161243136],
-                            coverage_override=(maxiters = 6,))
+                            l2=[4.949660644033807e-5],
+                            linf=[0.0004867846262313763],
+                            coverage_override=(maxiters = 6,), atol=1e-9)
 
         # Ensure that we do not have excessive memory allocations
         # (e.g., from type instabilities)
@@ -97,8 +97,8 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_2d_dgsem")
     @trixi_testset "elixir_advection_amr_unstructured_flag.jl" begin
         @test_trixi_include(joinpath(EXAMPLES_DIR,
                                      "elixir_advection_amr_unstructured_flag.jl"),
-                            l2=[0.001980652042312077],
-                            linf=[0.0328882442132265],
+                            l2=[0.001993165013217687],
+                            linf=[0.032891018571625796],
                             coverage_override=(maxiters = 6,))
 
         # Ensure that we do not have excessive memory allocations
diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
index 279e858e8e6..418e3d422f1 100644
--- a/test/test_t8code_2d.jl
+++ b/test/test_t8code_2d.jl
@@ -137,6 +137,24 @@ end
     end
 end
 
+@trixi_testset "elixir_advection_restart.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+                        l2=[4.507575525876275e-6],
+                        linf=[6.21489667023134e-5],
+                        # With the default `maxiters = 1` in coverage tests,
+                        # there would be no time steps after the restart.
+                        coverage_override=(maxiters = 100_000,))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
+
 @trixi_testset "elixir_advection_restart_amr.jl" begin
     # This test is identical to the one in `test_p4est_2d.jl`.
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart_amr.jl"),

From b6e133391c554afe2c95b43720831b51326bc806 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 26 Jun 2024 14:13:18 +0200
Subject: [PATCH 51/89] Applied formatter.

---
 .../t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl  | 3 ++-
 src/solvers/dgsem_t8code/containers_parallel.jl                | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 352455d986f..ca7b1da7fe2 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -69,7 +69,8 @@ amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first)
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
 
 stepsize_callback = StepsizeCallback(cfl = 0.7)
 
diff --git a/src/solvers/dgsem_t8code/containers_parallel.jl b/src/solvers/dgsem_t8code/containers_parallel.jl
index 381f672f43d..731f6aea7e2 100644
--- a/src/solvers/dgsem_t8code/containers_parallel.jl
+++ b/src/solvers/dgsem_t8code/containers_parallel.jl
@@ -22,7 +22,8 @@ function reinitialize_containers!(mesh::ParallelT8codeMesh, equations, dg::DGSEM
 
                      # Temporary arrays for updating `mpi_cache`.
                      global_mortar_ids = fill(UInt128(0), nmpimortars(mpi_mortars)),
-                     global_interface_ids = fill(UInt128(0), nmpiinterfaces(mpi_interfaces)),
+                     global_interface_ids = fill(UInt128(0),
+                                                 nmpiinterfaces(mpi_interfaces)),
                      neighbor_ranks_mortar = Vector{Vector{Int}}(undef,
                                                                  nmpimortars(mpi_mortars)),
                      neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)))

From 1aea5cdcc84012be0365f68a4109b426e94401db Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Wed, 26 Jun 2024 14:23:53 +0200
Subject: [PATCH 52/89] Removed Printf.

---
 src/meshes/t8code_mesh.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index ec524fc2a4a..de36659a7cc 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -1,5 +1,3 @@
-using Printf
-
 """
     T8codeMesh{NDIMS} <: AbstractMesh{NDIMS}
 

From b60382337302c0ad65e27ab5ee2e32c2d418b7d2 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 28 Jun 2024 14:14:34 +0200
Subject: [PATCH 53/89] Incorporated review comments and code polish.

---
 ...ixir_advection_amr_solution_independent.jl |  9 ++-
 .../elixir_advection_amr_unstructured_flag.jl |  7 +++
 .../elixir_advection_restart_amr.jl           | 10 +++-
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl  |  9 ++-
 .../t8code_3d_dgsem/elixir_advection_amr.jl   |  9 ++-
 ...lixir_advection_amr_unstructured_curved.jl |  9 ++-
 src/meshes/mesh_io.jl                         |  9 +--
 src/meshes/t8code_mesh.jl                     | 55 ++++++++++++-------
 8 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
index d10cb167cb0..ab7d19a3207 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
@@ -129,7 +129,14 @@ amr_controller = ControllerThreeLevel(semi,
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
 
 stepsize_callback = StepsizeCallback(cfl = 1.6)
 
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index ca7b1da7fe2..2c2c55fb5b8 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -71,6 +71,13 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition = true,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
+
 
 stepsize_callback = StepsizeCallback(cfl = 0.7)
 
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
index fd3623dd88b..2703eeedb0c 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
@@ -37,7 +37,15 @@ amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first)
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
+
 callbacks_ext = CallbackSet(amr_callback, callbacks.discrete_callbacks...)
 
 integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false),
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
index a09ea46fa34..87f46696096 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -108,7 +108,14 @@ amr_controller = ControllerThreeLevel(semi, amr_indicator,
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
 
 cfl = 0.5
 stepsize_callback = StepsizeCallback(cfl = cfl)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr.jl b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
index 9ecc8955383..48e8a57cbcf 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
@@ -52,7 +52,14 @@ amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first)
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
 
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
index a47d94b62d5..e65312b2f29 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
@@ -86,7 +86,14 @@ amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first)
 amr_callback = AMRCallback(semi, amr_controller,
                            interval = 5,
                            adapt_initial_condition = true,
-                           adapt_initial_condition_only_refine = true)
+                           adapt_initial_condition_only_refine = true,
+                           dynamic_load_balancing = false)
+# We disable `dynamic_load_balancing` for now, since t8code does not support
+# partioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partioning. Without this feature
+# dynamic AMR simulations are not pefectly deterministic regarding to
+# convergent tests. Once this feature is available in t8code load balancing is
+# enabled again.
 
 stepsize_callback = StepsizeCallback(cfl = 1.2)
 
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 00242ba0169..8c0f7754402 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -227,11 +227,12 @@ end
 
 # This routine works for both, serial and MPI parallel mode. The forest
 # information is collected on all ranks and then gathered by the root rank.
-# Since only the `levels` array of unsigned bytes is basicially independent per
-# rank it is not worth the effort to have a collective write to the HDF5 file.
-# Instead, `levels` gets gathered by the root rank and written in serial.
+# Since only the `levels` array of UInt8 and the global number of elements per
+# tree (Int32) is necessary to reconstruct the forest it is not worth the
+# effort to have a collective write to the HDF5 file. Instead, `levels` and
+# `num_elements_per_tree` gets gathered by the root rank and written to disk.
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
-                        mpi_parallel::Any)
+                        mpi_parallel::Union{False,True})
 
     # Create output directory (if it does not exist).
     mpi_isroot() && mkpath(output_directory)
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index de36659a7cc..b5c0a2519d3 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -105,31 +105,42 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh)
 end
 
 """
-    T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = 1, mapping = nothing)
+    T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
+               boundary_names, treeIDs, neighIDs, faces, duals,
+               orientations, levels, num_elements_per_tree))
 
-Main mesh constructor for the `T8codeMesh` wrapping around a given t8code
-`forest` object. This constructor is typically called by other `T8codeMesh`
-constructors.
+Constructor for the `T8codeMesh`. Typically called by the `load_mesh` routine. 
 
 # Arguments
-- `forest`: Pointer to a t8code forest.
+- `ndims`: Dimension of the mesh. 
+- `ntrees`: Global number of trees. 
+- `nelements`: Global number of elements. 
+- `tree_node_coordinates`: Node coordinates for each tree: [dimension, i, j, k, tree]
+- `nodes`: Array of interpolation nodes.
 - `boundary_names`: List of boundary names.
-- `polydeg::Integer`: Polynomial degree used to store the geometry of the mesh.
-                      The mapping will be approximated by an interpolation polynomial
-                      of the specified degree for each tree.
-- `mapping`: A function of `NDIMS` variables to describe the mapping that transforms
-             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `treeIDs`: List of tree IDs. The lenght is the number of conforming interfaces of the coarse mesh.
+- `neighIDs`: List of neighboring tree IDs. Same length as `treeIDs`.
+- `faces`: List of face IDs. Same length as `treeIDs`.
+- `duals`: List of face IDs of the neighboring tree. Same length as `treeIDs`.
+- `orientations`: Orientation number of the interface. Same length as `treeIDs`.
+- `levels`: List of levels of each element. Has length `nelements`.
+- `num_elements_per_tree`: List of global number of elements per tree. Has length `ntrees`.
+
+Returns a `T8codeMesh` object with a forest reconstructed by the input arguments.
 """
 function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
-                    boundary_names, elemIDs, neighIDs, faces, duals,
+                    boundary_names, treeIDs, neighIDs, faces, duals,
                     orientations, levels, num_elements_per_tree)
     Trixi.cmesh_ref = Ref(t8_cmesh_t())
     t8_cmesh_init(Trixi.cmesh_ref)
     cmesh = Trixi.cmesh_ref[]
 
     # Use linear geometry for now. There is no real Lagrange geometry
-    # implementation yet in t8code.
-    Trixi.linear_geom = Trixi.t8_geometry_linear_new(2)
+    # implementation (volume nodes) yet in t8code. Moreover, we need to store
+    # the pointer variables in the `Trixi` package in order to avoid garbage
+    # collection. Otherwise t8code segfaults. This is an un-feature of t8code
+    # (recently introduced) and will be fixed in the near future.
+    Trixi.linear_geom = Trixi.t8_geometry_linear_new(ndims)
     Trixi.linear_geom_ptr = pointer_from_objref(Ref(Trixi.linear_geom))
     t8_cmesh_register_geometry(cmesh, Trixi.linear_geom_ptr)
 
@@ -188,12 +199,12 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
             vertices[24] = tree_node_coordinates[3, N, N, N, i]
         end
 
-        t8_cmesh_set_tree_vertices(cmesh, i - 1, vertices, 3)
+        t8_cmesh_set_tree_vertices(cmesh, i - 1, vertices, 2^ndims)
     end
 
     # Connect the coarse mesh elements.
-    for i in 1:length(elemIDs)
-        t8_cmesh_set_join(cmesh, elemIDs[i], neighIDs[i], faces[i], duals[i],
+    for i in eachindex(treeIDs)
+        t8_cmesh_set_join(cmesh, treeIDs[i], neighIDs[i], faces[i], duals[i],
                           orientations[i])
     end
 
@@ -212,7 +223,7 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
 
     # Compute the offset within the to-be-reconstructed forest. Depends on the
     # MPI rank resp. first global tree id.
-    if mpi_rank() > 0
+    if mpi_rank() > 0 && t8_forest_get_local_num_elements(forest) > 0
         last_global_tree_id_of_preceding_rank = t8_forest_global_tree_id(forest, 0) - 1
         global_element_id += cum_sum_num_elements_per_tree[last_global_tree_id_of_preceding_rank + 1]
     end
@@ -244,6 +255,8 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
     forest = adapt(forest, adapt_callback; recursive = true, balance = false,
                    partition = false, ghost = false, user_data = C_NULL)
 
+    @assert t8_forest_get_global_num_elements(forest) == nelements
+
     if mpi_isparallel()
         forest = partition(forest)
     end
@@ -974,7 +987,7 @@ function count_interfaces(forest::Ptr{t8_forest}, ndims)
 
     visited_global_mortar_ids = Set{UInt128}([])
 
-    max_level = t8_forest_get_maxlevel(forest) #UInt64
+    max_level = t8_forest_get_maxlevel(forest)
     max_tree_num_elements = UInt128(2^ndims)^max_level
 
     if mpi_isparallel()
@@ -1143,8 +1156,8 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
     ]
 
     # Helper variables to compute unique global MPI interface/mortar ids.
-    max_level = t8_forest_get_maxlevel(mesh.forest) #UInt64
-    max_tree_num_elements = (UInt128(2)^ndims(mesh))^max_level
+    max_level = t8_forest_get_maxlevel(mesh.forest)
+    max_tree_num_elements = UInt128(2^ndims(mesh))^max_level
 
     # These two variables help to ensure that we count MPI mortars from smaller
     # elements point of view only once.
@@ -1438,6 +1451,8 @@ function get_cmesh_info(mesh::T8codeMesh)
     return get_cmesh_info(cmesh, ndims(mesh))
 end
 
+# Note, `cmesh` is not partitioned as of now.
+# Every MPI rank has a full copy of the `cmesh`.
 function get_cmesh_info(cmesh::Ptr{t8_cmesh}, ndims)
     num_trees = t8_cmesh_get_num_trees(cmesh)
     num_faces = 2 * ndims

From 6ef17a4d3ee645e94927c4675625a181bb614422 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 28 Jun 2024 14:15:13 +0200
Subject: [PATCH 54/89] Applied formatter.

---
 .../t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl   | 1 -
 src/meshes/mesh_io.jl                                           | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 2c2c55fb5b8..88739660d2d 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -78,7 +78,6 @@ amr_callback = AMRCallback(semi, amr_controller,
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
 
-
 stepsize_callback = StepsizeCallback(cfl = 0.7)
 
 callbacks = CallbackSet(summary_callback,
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 8c0f7754402..0fc20775f7a 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -232,7 +232,7 @@ end
 # effort to have a collective write to the HDF5 file. Instead, `levels` and
 # `num_elements_per_tree` gets gathered by the root rank and written to disk.
 function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
-                        mpi_parallel::Union{False,True})
+                        mpi_parallel::Union{False, True})
 
     # Create output directory (if it does not exist).
     mpi_isroot() && mkpath(output_directory)

From 69b7ac10baf50a67ea447fc47198e09101745b36 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 28 Jun 2024 15:19:07 +0200
Subject: [PATCH 55/89] Fixed typos.

---
 .../elixir_advection_amr_solution_independent.jl              | 4 ++--
 .../t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl | 4 ++--
 examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl      | 4 ++--
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl                  | 4 ++--
 examples/t8code_3d_dgsem/elixir_advection_amr.jl              | 4 ++--
 .../elixir_advection_amr_unstructured_curved.jl               | 4 ++--
 src/meshes/t8code_mesh.jl                                     | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
index ab7d19a3207..cfa0a34bb34 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
@@ -132,8 +132,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index 88739660d2d..bd82809e016 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -72,8 +72,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
index 2703eeedb0c..68fa726227b 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
@@ -40,8 +40,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
index 87f46696096..88a2888de62 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -111,8 +111,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr.jl b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
index 48e8a57cbcf..d2a0e3fb666 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
@@ -55,8 +55,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
index e65312b2f29..56b2574849f 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
@@ -89,8 +89,8 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition_only_refine = true,
                            dynamic_load_balancing = false)
 # We disable `dynamic_load_balancing` for now, since t8code does not support
-# partioning for coarsening yet. That is, a complete family of elements always
-# stays on rank and is not split up due to partioning. Without this feature
+# partitioning for coarsening yet. That is, a complete family of elements always
+# stays on rank and is not split up due to partitioning. Without this feature
 # dynamic AMR simulations are not pefectly deterministic regarding to
 # convergent tests. Once this feature is available in t8code load balancing is
 # enabled again.
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index b5c0a2519d3..19ee671f6ce 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -118,7 +118,7 @@ Constructor for the `T8codeMesh`. Typically called by the `load_mesh` routine.
 - `tree_node_coordinates`: Node coordinates for each tree: [dimension, i, j, k, tree]
 - `nodes`: Array of interpolation nodes.
 - `boundary_names`: List of boundary names.
-- `treeIDs`: List of tree IDs. The lenght is the number of conforming interfaces of the coarse mesh.
+- `treeIDs`: List of tree IDs. The length is the number of conforming interfaces of the coarse mesh.
 - `neighIDs`: List of neighboring tree IDs. Same length as `treeIDs`.
 - `faces`: List of face IDs. Same length as `treeIDs`.
 - `duals`: List of face IDs of the neighboring tree. Same length as `treeIDs`.

From f42fff19152fd5a6fb360d163a7050bf8557b6e0 Mon Sep 17 00:00:00 2001
From: Lars Christmann <lars@l12n.eu>
Date: Tue, 2 Jul 2024 13:53:06 +0200
Subject: [PATCH 56/89] Introduce Enum for encoding surface indexing

!!! WIP, interferes with coupling API, so probably a breaking change !!!

This was previously done with tuples like (:begin, :i_forward, :j_forward)
but `Symbol`s cause issues when used within GPU kernels. Enums however
work fine, so they are used instead of symbols. Since `begin` and `end`
are reserved keywords, the corresponding enum values are called
`first` and `last` instead and contained in a module to prevent
name collisions.
---
 .../elixir_advection_coupled.jl               | 33 ++++----
 .../elixir_advection_meshview.jl              | 10 +--
 .../structured_2d_dgsem/elixir_mhd_coupled.jl |  8 +-
 .../semidiscretization_coupled.jl             | 12 +--
 src/solvers/dg.jl                             | 28 ++++++-
 src/solvers/dgsem_p4est/containers.jl         | 28 +++----
 src/solvers/dgsem_p4est/containers_2d.jl      | 32 ++++----
 src/solvers/dgsem_p4est/containers_3d.jl      | 78 +++++++++----------
 .../dgsem_p4est/containers_parallel.jl        | 10 +--
 .../dgsem_p4est/containers_parallel_2d.jl     | 12 +--
 .../dgsem_p4est/containers_parallel_3d.jl     | 34 ++++----
 src/solvers/dgsem_p4est/dg_2d.jl              | 16 ++--
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl    |  4 +-
 src/solvers/dgsem_p4est/dg_2d_parallel.jl     |  4 +-
 src/solvers/dgsem_p4est/dg_3d.jl              | 30 +++----
 15 files changed, 184 insertions(+), 155 deletions(-)

diff --git a/examples/structured_2d_dgsem/elixir_advection_coupled.jl b/examples/structured_2d_dgsem/elixir_advection_coupled.jl
index 0002bb8d374..301d6f8e49b 100644
--- a/examples/structured_2d_dgsem/elixir_advection_coupled.jl
+++ b/examples/structured_2d_dgsem/elixir_advection_coupled.jl
@@ -1,5 +1,6 @@
 using OrdinaryDiffEq
 using Trixi
+import Trixi.Indexing
 
 ###############################################################################
 # Coupled semidiscretization of four linear advection systems using converter functions such that
@@ -61,13 +62,13 @@ coupling_function12 = (x, u, equations_other, equations_own) -> u
 coupling_function13 = (x, u, equations_other, equations_own) -> u
 
 # Define the coupling boundary conditions and the system it is coupled to.
-boundary_conditions_x_neg1 = BoundaryConditionCoupled(2, (:end, :i_forward), Float64,
+boundary_conditions_x_neg1 = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64,
                                                       coupling_function12)
-boundary_conditions_x_pos1 = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64,
+boundary_conditions_x_pos1 = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64,
                                                       coupling_function12)
-boundary_conditions_y_neg1 = BoundaryConditionCoupled(3, (:i_forward, :end), Float64,
+boundary_conditions_y_neg1 = BoundaryConditionCoupled(3, (Indexing.i_forward, Indexing.last), Float64,
                                                       coupling_function13)
-boundary_conditions_y_pos1 = BoundaryConditionCoupled(3, (:i_forward, :begin), Float64,
+boundary_conditions_y_pos1 = BoundaryConditionCoupled(3, (Indexing.i_forward, Indexing.first), Float64,
                                                       coupling_function13)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
@@ -93,13 +94,13 @@ coupling_function21 = (x, u, equations_other, equations_own) -> u
 coupling_function24 = (x, u, equations_other, equations_own) -> u
 
 # Define the coupling boundary conditions and the system it is coupled to.
-boundary_conditions_x_neg2 = BoundaryConditionCoupled(1, (:end, :i_forward), Float64,
+boundary_conditions_x_neg2 = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64,
                                                       coupling_function21)
-boundary_conditions_x_pos2 = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64,
+boundary_conditions_x_pos2 = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64,
                                                       coupling_function21)
-boundary_conditions_y_neg2 = BoundaryConditionCoupled(4, (:i_forward, :end), Float64,
+boundary_conditions_y_neg2 = BoundaryConditionCoupled(4, (Indexing.i_forward, Indexing.last), Float64,
                                                       coupling_function24)
-boundary_conditions_y_pos2 = BoundaryConditionCoupled(4, (:i_forward, :begin), Float64,
+boundary_conditions_y_pos2 = BoundaryConditionCoupled(4, (Indexing.i_forward, Indexing.first), Float64,
                                                       coupling_function24)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
@@ -125,13 +126,13 @@ coupling_function34 = (x, u, equations_other, equations_own) -> u
 coupling_function31 = (x, u, equations_other, equations_own) -> u
 
 # Define the coupling boundary conditions and the system it is coupled to.
-boundary_conditions_x_neg3 = BoundaryConditionCoupled(4, (:end, :i_forward), Float64,
+boundary_conditions_x_neg3 = BoundaryConditionCoupled(4, (Indexing.last, Indexing.i_forward), Float64,
                                                       coupling_function34)
-boundary_conditions_x_pos3 = BoundaryConditionCoupled(4, (:begin, :i_forward), Float64,
+boundary_conditions_x_pos3 = BoundaryConditionCoupled(4, (Indexing.first, Indexing.i_forward), Float64,
                                                       coupling_function34)
-boundary_conditions_y_neg3 = BoundaryConditionCoupled(1, (:i_forward, :end), Float64,
+boundary_conditions_y_neg3 = BoundaryConditionCoupled(1, (Indexing.i_forward, Indexing.last), Float64,
                                                       coupling_function31)
-boundary_conditions_y_pos3 = BoundaryConditionCoupled(1, (:i_forward, :begin), Float64,
+boundary_conditions_y_pos3 = BoundaryConditionCoupled(1, (Indexing.i_forward, Indexing.first), Float64,
                                                       coupling_function31)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
@@ -157,13 +158,13 @@ coupling_function43 = (x, u, equations_other, equations_own) -> u
 coupling_function42 = (x, u, equations_other, equations_own) -> u
 
 # Define the coupling boundary conditions and the system it is coupled to.
-boundary_conditions_x_neg4 = BoundaryConditionCoupled(3, (:end, :i_forward), Float64,
+boundary_conditions_x_neg4 = BoundaryConditionCoupled(3, (Indexing.last, Indexing.i_forward), Float64,
                                                       coupling_function43)
-boundary_conditions_x_pos4 = BoundaryConditionCoupled(3, (:begin, :i_forward), Float64,
+boundary_conditions_x_pos4 = BoundaryConditionCoupled(3, (Indexing.first, Indexing.i_forward), Float64,
                                                       coupling_function43)
-boundary_conditions_y_neg4 = BoundaryConditionCoupled(2, (:i_forward, :end), Float64,
+boundary_conditions_y_neg4 = BoundaryConditionCoupled(2, (Indexing.i_forward, Indexing.last), Float64,
                                                       coupling_function42)
-boundary_conditions_y_pos4 = BoundaryConditionCoupled(2, (:i_forward, :begin), Float64,
+boundary_conditions_y_pos4 = BoundaryConditionCoupled(2, (Indexing.i_forward, Indexing.first), Float64,
                                                       coupling_function42)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
diff --git a/examples/structured_2d_dgsem/elixir_advection_meshview.jl b/examples/structured_2d_dgsem/elixir_advection_meshview.jl
index d8d27031090..d618467dcae 100644
--- a/examples/structured_2d_dgsem/elixir_advection_meshview.jl
+++ b/examples/structured_2d_dgsem/elixir_advection_meshview.jl
@@ -51,21 +51,21 @@ mesh2 = StructuredMeshView(parent_mesh; indices_min = (9, 1), indices_max = (16,
 coupling_function = (x, u, equations_other, equations_own) -> u
 
 # Define the coupled boundary conditions
-# The indices (:end, :i_forward) and (:begin, :i_forward) denote the interface indexing.
+# The indices (Indexing.last, Indexing.i_forward) and (Indexing.first, Indexing.i_forward) denote the interface indexing.
 # For a system with coupling in x and y see examples/structured_2d_dgsem/elixir_advection_coupled.jl.
 boundary_conditions1 = (
                         # Connect left boundary with right boundary of left mesh
-                        x_neg = BoundaryConditionCoupled(2, (:end, :i_forward), Float64,
+                        x_neg = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64,
                                                          coupling_function),
-                        x_pos = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64,
+                        x_pos = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64,
                                                          coupling_function),
                         y_neg = boundary_condition_periodic,
                         y_pos = boundary_condition_periodic)
 boundary_conditions2 = (
                         # Connect left boundary with right boundary of left mesh
-                        x_neg = BoundaryConditionCoupled(1, (:end, :i_forward), Float64,
+                        x_neg = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64,
                                                          coupling_function),
-                        x_pos = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64,
+                        x_pos = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64,
                                                          coupling_function),
                         y_neg = boundary_condition_periodic,
                         y_pos = boundary_condition_periodic)
diff --git a/examples/structured_2d_dgsem/elixir_mhd_coupled.jl b/examples/structured_2d_dgsem/elixir_mhd_coupled.jl
index d3aa4ecf582..0a8d20010e3 100644
--- a/examples/structured_2d_dgsem/elixir_mhd_coupled.jl
+++ b/examples/structured_2d_dgsem/elixir_mhd_coupled.jl
@@ -57,9 +57,9 @@ mesh1 = StructuredMesh(cells_per_dimension,
                        periodicity = (false, true))
 
 coupling_function1 = (x, u, equations_other, equations_own) -> u
-boundary_conditions1 = (x_neg = BoundaryConditionCoupled(2, (:end, :i_forward), Float64,
+boundary_conditions1 = (x_neg = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64,
                                                          coupling_function1),
-                        x_pos = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64,
+                        x_pos = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64,
                                                          coupling_function1),
                         y_neg = boundary_condition_periodic,
                         y_pos = boundary_condition_periodic)
@@ -80,9 +80,9 @@ mesh2 = StructuredMesh(cells_per_dimension,
                        periodicity = (false, true))
 
 coupling_function2 = (x, u, equations_other, equations_own) -> u
-boundary_conditions2 = (x_neg = BoundaryConditionCoupled(1, (:end, :i_forward), Float64,
+boundary_conditions2 = (x_neg = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64,
                                                          coupling_function2),
-                        x_pos = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64,
+                        x_pos = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64,
                                                          coupling_function2),
                         y_neg = boundary_condition_periodic,
                         y_pos = boundary_condition_periodic)
diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl
index 6b009cfad20..632fc087100 100644
--- a/src/semidiscretization/semidiscretization_coupled.jl
+++ b/src/semidiscretization/semidiscretization_coupled.jl
@@ -417,13 +417,13 @@ This is currently only implemented for [`StructuredMesh`](@ref).
 ```julia
 # Connect the left boundary of mesh 2 to our boundary such that our positive
 # boundary direction will match the positive y direction of the other boundary
-BoundaryConditionCoupled(2, (:begin, :i), Float64, fun)
+BoundaryConditionCoupled(2, (Indexing.first, :i), Float64, fun)
 
 # Connect the same two boundaries oppositely oriented
-BoundaryConditionCoupled(2, (:begin, :i_backwards), Float64, fun)
+BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_backwards), Float64, fun)
 
 # Using this as y_neg boundary will connect `our_cells[i, 1, j]` to `other_cells[j, end-i, end]`
-BoundaryConditionCoupled(2, (:j, :i_backwards, :end), Float64, fun)
+BoundaryConditionCoupled(2, (:j, Indexing.i_backwards, Indexing.last), Float64, fun)
 ```
 
 !!! warning "Experimental code"
@@ -448,11 +448,11 @@ mutable struct BoundaryConditionCoupled{NDIMS,
         NDIMS = length(indices)
         u_boundary = Array{uEltype, NDIMS * 2 - 1}(undef, ntuple(_ -> 0, NDIMS * 2 - 1))
 
-        if indices[1] in (:begin, :end)
+        if indices[1] in (Indexing.first, Indexing.last)
             other_orientation = 1
-        elseif indices[2] in (:begin, :end)
+        elseif indices[2] in (Indexing.first, Indexing.last)
             other_orientation = 2
-        else # indices[3] in (:begin, :end)
+        else # indices[3] in (Indexing.first, Indexing.last)
             other_orientation = 3
         end
 
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index fb4c8f182e0..84741679191 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -743,6 +743,33 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg:
     end
 end
 
+end # @muladd; put it up here because module definition below needs to be at top level
+
+# For some mesh types, elements next to a surface may have local coordinate systems
+# that are not aligned so the nodes may have to be indexed differently.
+# `IndexInfo` is used to describe how the nodes should be indexed.
+# For example, in 2d a `Tuple` with two `IndexInfo` objects, one for each dimension,
+# would be used.
+# `first` or `last` indicates that the corresponding index is constant and is either
+# the first or the last one. This effectively encodes the position of the surface
+# with respect to the local coordinate system. The other `IndexInfo` object(s)
+# encode if the index in the corresponding dimension is running forward or backward.
+#
+# The Enum is wrapped in a module and exported so that the enum values do not pollute
+# the global namespace and can only be accessed via `Indexing.value`.
+module Indexing
+@enum IndexInfo begin
+    first
+    last
+    i_forward
+    i_backward
+    j_forward
+    j_backward
+end
+export IndexInfo
+end
+using .Indexing
+
 # Discretizations specific to each mesh type of Trixi.jl
 # If some functionality is shared by multiple combinations of meshes/solvers,
 # it is defined in the directory of the most basic mesh and solver type.
@@ -756,4 +783,3 @@ include("dgsem_structured/dg.jl")
 include("dgsem_unstructured/dg.jl")
 include("dgsem_p4est/dg.jl")
 include("dgsem_t8code/dg.jl")
-end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index f9830d0011c..c6522c3f6b5 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -130,12 +130,12 @@ mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
                AbstractContainer
     u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
     neighbor_ids::Matrix{Int}                   # [primary/secondary, interface]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface]
+    node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [primary/secondary, interface]
 
     # internal `resize!`able storage
     _u::Vector{uEltype}
     _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
 end
 
 @inline function ninterfaces(interfaces::P4estInterfaceContainer)
@@ -183,7 +183,7 @@ function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e
     _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces)
     neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces))
 
-    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces)
+    _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_interfaces)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces))
 
     interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids,
@@ -206,7 +206,7 @@ mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <:
                AbstractContainer
     u::Array{uEltype, NDIMSP1}       # [variables, i, j, boundary]
     neighbor_ids::Vector{Int}                   # [boundary]
-    node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary]
+    node_indices::Vector{NTuple{NDIMS, IndexInfo}} # [boundary]
     name::Vector{Symbol}                # [boundary]
 
     # internal `resize!`able storage
@@ -256,7 +256,7 @@ function init_boundaries(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e
                      n_boundaries))
 
     neighbor_ids = Vector{Int}(undef, n_boundaries)
-    node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries)
+    node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, n_boundaries)
     names = Vector{Symbol}(undef, n_boundaries)
 
     boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids,
@@ -337,12 +337,12 @@ mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <:
                AbstractContainer
     u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
     neighbor_ids::Matrix{Int}             # [position, mortar]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+    node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [small/large, mortar]
 
     # internal `resize!`able storage
     _u::Vector{uEltype}
     _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
 end
 
 @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2)
@@ -390,7 +390,7 @@ function init_mortars(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elem
     neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
                                (2^(NDIMS - 1) + 1, n_mortars))
 
-    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars)
+    _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_mortars)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars))
 
     mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u,
@@ -705,17 +705,17 @@ end
 
 # Return direction of the face, which is indexed by node_indices
 @inline function indices2direction(indices)
-    if indices[1] === :begin
+    if indices[1] === Indexing.first
         return 1
-    elseif indices[1] === :end
+    elseif indices[1] === Indexing.last
         return 2
-    elseif indices[2] === :begin
+    elseif indices[2] === Indexing.first
         return 3
-    elseif indices[2] === :end
+    elseif indices[2] === Indexing.last
         return 4
-    elseif indices[3] === :begin
+    elseif indices[3] === Indexing.first
         return 5
-    else # if indices[3] === :end
+    else # if indices[3] === Indexing.last
         return 6
     end
 end
diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl
index 236d7d24c06..8b12559f200 100644
--- a/src/solvers/dgsem_p4est/containers_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_2d.jl
@@ -94,24 +94,24 @@ end
         # relative to the interface.
         if side == 1 || orientation == 0
             # Forward indexing
-            i = :i_forward
+            i = Indexing.i_forward
         else
             # Backward indexing
-            i = :i_backward
+            i = Indexing.i_backward
         end
 
         if faces[side] == 0
             # Index face in negative x-direction
-            interfaces.node_indices[side, interface_id] = (:begin, i)
+            interfaces.node_indices[side, interface_id] = (Indexing.first, i)
         elseif faces[side] == 1
             # Index face in positive x-direction
-            interfaces.node_indices[side, interface_id] = (:end, i)
+            interfaces.node_indices[side, interface_id] = (Indexing.last, i)
         elseif faces[side] == 2
             # Index face in negative y-direction
-            interfaces.node_indices[side, interface_id] = (i, :begin)
+            interfaces.node_indices[side, interface_id] = (i, Indexing.first)
         else # faces[side] == 3
             # Index face in positive y-direction
-            interfaces.node_indices[side, interface_id] = (i, :end)
+            interfaces.node_indices[side, interface_id] = (i, Indexing.last)
         end
     end
 
@@ -123,16 +123,16 @@ end
                                              face, boundary_id)
     if face == 0
         # Index face in negative x-direction
-        boundaries.node_indices[boundary_id] = (:begin, :i_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.first, Indexing.i_forward)
     elseif face == 1
         # Index face in positive x-direction
-        boundaries.node_indices[boundary_id] = (:end, :i_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.last, Indexing.i_forward)
     elseif face == 2
         # Index face in negative y-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :begin)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.first)
     else # face == 3
         # Index face in positive y-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :end)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.last)
     end
 
     return boundaries
@@ -147,24 +147,24 @@ end
         # relative to the mortar.
         if side == 1 || orientation == 0
             # Forward indexing for small side or orientation == 0
-            i = :i_forward
+            i = Indexing.i_forward
         else
             # Backward indexing for large side with reversed orientation
-            i = :i_backward
+            i = Indexing.i_backward
         end
 
         if faces[side] == 0
             # Index face in negative x-direction
-            mortars.node_indices[side, mortar_id] = (:begin, i)
+            mortars.node_indices[side, mortar_id] = (Indexing.first, i)
         elseif faces[side] == 1
             # Index face in positive x-direction
-            mortars.node_indices[side, mortar_id] = (:end, i)
+            mortars.node_indices[side, mortar_id] = (Indexing.last, i)
         elseif faces[side] == 2
             # Index face in negative y-direction
-            mortars.node_indices[side, mortar_id] = (i, :begin)
+            mortars.node_indices[side, mortar_id] = (i, Indexing.first)
         else # faces[side] == 3
             # Index face in positive y-direction
-            mortars.node_indices[side, mortar_id] = (i, :end)
+            mortars.node_indices[side, mortar_id] = (i, Indexing.last)
         end
     end
 
diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl
index 7e383924ba7..88bbc693a2a 100644
--- a/src/solvers/dgsem_p4est/containers_3d.jl
+++ b/src/solvers/dgsem_p4est/containers_3d.jl
@@ -81,11 +81,11 @@ end
                                               faces, orientation, interface_id)
     # Iterate over primary and secondary element
     for side in 1:2
-        # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
+        # Align interface at the primary element (primary element has surface indices (Indexing.i_forward, Indexing.j_forward)).
         # The secondary element needs to be indexed differently.
         if side == 1
-            surface_index1 = :i_forward
-            surface_index2 = :j_forward
+            surface_index1 = Indexing.i_forward
+            surface_index2 = Indexing.j_forward
         else
             surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
                                                                           faces[1],
@@ -94,28 +94,28 @@ end
 
         if faces[side] == 0
             # Index face in negative x-direction
-            interfaces.node_indices[side, interface_id] = (:begin, surface_index1,
+            interfaces.node_indices[side, interface_id] = (Indexing.first, surface_index1,
                                                            surface_index2)
         elseif faces[side] == 1
             # Index face in positive x-direction
-            interfaces.node_indices[side, interface_id] = (:end, surface_index1,
+            interfaces.node_indices[side, interface_id] = (Indexing.last, surface_index1,
                                                            surface_index2)
         elseif faces[side] == 2
             # Index face in negative y-direction
-            interfaces.node_indices[side, interface_id] = (surface_index1, :begin,
+            interfaces.node_indices[side, interface_id] = (surface_index1, Indexing.first,
                                                            surface_index2)
         elseif faces[side] == 3
             # Index face in positive y-direction
-            interfaces.node_indices[side, interface_id] = (surface_index1, :end,
+            interfaces.node_indices[side, interface_id] = (surface_index1, Indexing.last,
                                                            surface_index2)
         elseif faces[side] == 4
             # Index face in negative z-direction
             interfaces.node_indices[side, interface_id] = (surface_index1,
-                                                           surface_index2, :begin)
+                                                           surface_index2, Indexing.first)
         else # faces[side] == 5
             # Index face in positive z-direction
             interfaces.node_indices[side, interface_id] = (surface_index1,
-                                                           surface_index2, :end)
+                                                           surface_index2, Indexing.last)
         end
     end
 
@@ -127,22 +127,22 @@ end
                                              face, boundary_id)
     if face == 0
         # Index face in negative x-direction
-        boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.first, Indexing.i_forward, Indexing.j_forward)
     elseif face == 1
         # Index face in positive x-direction
-        boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.last, Indexing.i_forward, Indexing.j_forward)
     elseif face == 2
         # Index face in negative y-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.first, Indexing.j_forward)
     elseif face == 3
         # Index face in positive y-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.last, Indexing.j_forward)
     elseif face == 4
         # Index face in negative z-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.j_forward, Indexing.first)
     else # face == 5
         # Index face in positive z-direction
-        boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end)
+        boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.j_forward, Indexing.last)
     end
 
     return boundaries
@@ -156,8 +156,8 @@ end
         # Align mortar at small side.
         # The large side needs to be indexed differently.
         if side == 1
-            surface_index1 = :i_forward
-            surface_index2 = :j_forward
+            surface_index1 = Indexing.i_forward
+            surface_index2 = Indexing.j_forward
         else
             surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
                                                                           faces[1],
@@ -166,28 +166,28 @@ end
 
         if faces[side] == 0
             # Index face in negative x-direction
-            mortars.node_indices[side, mortar_id] = (:begin, surface_index1,
+            mortars.node_indices[side, mortar_id] = (Indexing.first, surface_index1,
                                                      surface_index2)
         elseif faces[side] == 1
             # Index face in positive x-direction
-            mortars.node_indices[side, mortar_id] = (:end, surface_index1,
+            mortars.node_indices[side, mortar_id] = (Indexing.last, surface_index1,
                                                      surface_index2)
         elseif faces[side] == 2
             # Index face in negative y-direction
-            mortars.node_indices[side, mortar_id] = (surface_index1, :begin,
+            mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.first,
                                                      surface_index2)
         elseif faces[side] == 3
             # Index face in positive y-direction
-            mortars.node_indices[side, mortar_id] = (surface_index1, :end,
+            mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.last,
                                                      surface_index2)
         elseif faces[side] == 4
             # Index face in negative z-direction
             mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
-                                                     :begin)
+                                                     Indexing.first)
         else # faces[side] == 5
             # Index face in positive z-direction
             mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
-                                                     :end)
+                                                     Indexing.last)
         end
     end
 
@@ -228,8 +228,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑            ↑
             #     │            │
             #     └───> ξ      └───> ξ
-            surface_index1 = :i_forward
-            surface_index2 = :j_forward
+            surface_index1 = Indexing.i_forward
+            surface_index2 = Indexing.j_forward
         elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side
                 ||
                 (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side
@@ -241,8 +241,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑            │
             #     │            ↓
             #     └───> ξ      ξ
-            surface_index1 = :j_backward
-            surface_index2 = :i_forward
+            surface_index1 = Indexing.j_backward
+            surface_index2 = Indexing.i_forward
         elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side
                 ||
                 (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side
@@ -254,8 +254,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑                 ↑
             #     │                 │
             #     └───> ξ     η <───┘
-            surface_index1 = :j_forward
-            surface_index2 = :i_backward
+            surface_index1 = Indexing.j_forward
+            surface_index2 = Indexing.i_backward
         else # orientation_code == 3
             # Corner 0 of my side matches corner 3 of other side and
             # corner 0 of other side matches corner 3 of my side.
@@ -267,8 +267,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑                 │
             #     │                 ↓
             #     └───> ξ           η
-            surface_index1 = :i_backward
-            surface_index2 = :j_backward
+            surface_index1 = Indexing.i_backward
+            surface_index2 = Indexing.j_backward
         end
     else # flipped
         if orientation_code == 0
@@ -281,8 +281,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑            ↑
             #     │            │
             #     └───> ξ      └───> η
-            surface_index1 = :j_forward
-            surface_index2 = :i_forward
+            surface_index1 = Indexing.j_forward
+            surface_index2 = Indexing.i_forward
         elseif orientation_code == 2
             # Corner 0 of my side matches corner 2 of other side and
             # corner 0 of other side matches corner 2 of my side.
@@ -294,8 +294,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑            │
             #     │            ↓
             #     └───> ξ      η
-            surface_index1 = :i_forward
-            surface_index2 = :j_backward
+            surface_index1 = Indexing.i_forward
+            surface_index2 = Indexing.j_backward
         elseif orientation_code == 1
             # Corner 0 of my side matches corner 1 of other side and
             # corner 0 of other side matches corner 1 of my side.
@@ -307,8 +307,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑                 ↑
             #     │                 │
             #     └───> ξ     ξ <───┘
-            surface_index1 = :i_backward
-            surface_index2 = :j_forward
+            surface_index1 = Indexing.i_backward
+            surface_index2 = Indexing.j_forward
         else # orientation_code == 3
             # Corner 0 of my side matches corner 3 of other side and
             # corner 0 of other side matches corner 3 of my side.
@@ -320,8 +320,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code)
             #     ↑                 │
             #     │                 ↓
             #     └───> ξ           ξ
-            surface_index1 = :j_backward
-            surface_index2 = :i_backward
+            surface_index1 = Indexing.j_backward
+            surface_index2 = Indexing.i_backward
         end
     end
 
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index fd2749155bb..949872bfa0f 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -9,7 +9,7 @@ mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
                AbstractContainer
     u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
     local_neighbor_ids::Vector{Int}                   # [interface]
-    node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface]
+    node_indices::Vector{NTuple{NDIMS, IndexInfo}} # [interface]
     local_sides::Vector{Int}                   # [interface]
 
     # internal `resize!`able storage
@@ -60,7 +60,7 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh},
 
     local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces)
 
-    node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces)
+    node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, n_mpi_interfaces)
 
     local_sides = Vector{Int}(undef, n_mpi_interfaces)
 
@@ -92,11 +92,11 @@ mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, ND
     u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
     local_neighbor_ids::Vector{Vector{Int}} # [mortar]
     local_neighbor_positions::Vector{Vector{Int}} # [mortar]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+    node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [small/large, mortar]
     normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar]
     # internal `resize!`able storage
     _u::Vector{uEltype}
-    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
     _normal_directions::Vector{RealT}
 end
 
@@ -153,7 +153,7 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq
     local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars)
     local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars)
 
-    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars)
+    _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_mpi_mortars)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars))
 
     _normal_directions = Vector{RealT}(undef,
diff --git a/src/solvers/dgsem_p4est/containers_parallel_2d.jl b/src/solvers/dgsem_p4est/containers_parallel_2d.jl
index d531d33821b..198b3280742 100644
--- a/src/solvers/dgsem_p4est/containers_parallel_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel_2d.jl
@@ -14,24 +14,24 @@
     # relative to the interface.
     if local_side == 1 || orientation == 0
         # Forward indexing
-        i = :i_forward
+        i = Indexing.i_forward
     else
         # Backward indexing
-        i = :i_backward
+        i = Indexing.i_backward
     end
 
     if faces[local_side] == 0
         # Index face in negative x-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i)
+        mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.first, i)
     elseif faces[local_side] == 1
         # Index face in positive x-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (:end, i)
+        mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.last, i)
     elseif faces[local_side] == 2
         # Index face in negative y-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin)
+        mpi_interfaces.node_indices[mpi_interface_id] = (i, Indexing.first)
     else # faces[local_side] == 3
         # Index face in positive y-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (i, :end)
+        mpi_interfaces.node_indices[mpi_interface_id] = (i, Indexing.last)
     end
 
     return mpi_interfaces
diff --git a/src/solvers/dgsem_p4est/containers_parallel_3d.jl b/src/solvers/dgsem_p4est/containers_parallel_3d.jl
index 56f0a543b97..37cbc6bb236 100644
--- a/src/solvers/dgsem_p4est/containers_parallel_3d.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel_3d.jl
@@ -9,11 +9,11 @@
 @inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{3},
                                                   faces, local_side, orientation,
                                                   mpi_interface_id)
-    # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
+    # Align interface at the primary element (primary element has surface indices (Indexing.i_forward, Indexing.j_forward)).
     # The secondary element needs to be indexed differently.
     if local_side == 1
-        surface_index1 = :i_forward
-        surface_index2 = :j_forward
+        surface_index1 = Indexing.i_forward
+        surface_index2 = Indexing.j_forward
     else # local_side == 2
         surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
                                                                       faces[1],
@@ -22,28 +22,28 @@
 
     if faces[local_side] == 0
         # Index face in negative x-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1,
+        mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.first, surface_index1,
                                                          surface_index2)
     elseif faces[local_side] == 1
         # Index face in positive x-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1,
+        mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.last, surface_index1,
                                                          surface_index2)
     elseif faces[local_side] == 2
         # Index face in negative y-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin,
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, Indexing.first,
                                                          surface_index2)
     elseif faces[local_side] == 3
         # Index face in positive y-direction
-        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end,
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, Indexing.last,
                                                          surface_index2)
     elseif faces[local_side] == 4
         # Index face in negative z-direction
         mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2,
-                                                         :begin)
+                                                         Indexing.first)
     else # faces[local_side] == 5
         # Index face in positive z-direction
         mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2,
-                                                         :end)
+                                                         Indexing.last)
     end
 
     return mpi_interfaces
@@ -57,8 +57,8 @@ end
         # Align mortar at small side.
         # The large side needs to be indexed differently.
         if side == 1
-            surface_index1 = :i_forward
-            surface_index2 = :j_forward
+            surface_index1 = Indexing.i_forward
+            surface_index2 = Indexing.j_forward
         else
             surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
                                                                           faces[1],
@@ -67,28 +67,28 @@ end
 
         if faces[side] == 0
             # Index face in negative x-direction
-            mortars.node_indices[side, mortar_id] = (:begin, surface_index1,
+            mortars.node_indices[side, mortar_id] = (Indexing.first, surface_index1,
                                                      surface_index2)
         elseif faces[side] == 1
             # Index face in positive x-direction
-            mortars.node_indices[side, mortar_id] = (:end, surface_index1,
+            mortars.node_indices[side, mortar_id] = (Indexing.last, surface_index1,
                                                      surface_index2)
         elseif faces[side] == 2
             # Index face in negative y-direction
-            mortars.node_indices[side, mortar_id] = (surface_index1, :begin,
+            mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.first,
                                                      surface_index2)
         elseif faces[side] == 3
             # Index face in positive y-direction
-            mortars.node_indices[side, mortar_id] = (surface_index1, :end,
+            mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.last,
                                                      surface_index2)
         elseif faces[side] == 4
             # Index face in negative z-direction
             mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
-                                                     :begin)
+                                                     Indexing.first)
         else # faces[side] == 5
             # Index face in positive z-direction
             mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
-                                                     :end)
+                                                     Indexing.last)
         end
     end
 
diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
index 36624f2ce8a..742534c6a0d 100644
--- a/src/solvers/dgsem_p4est/dg_2d.jl
+++ b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -20,7 +20,7 @@ function create_cache(mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations,
     (; fstar_upper_threaded, fstar_lower_threaded, u_threaded)
 end
 
-#     index_to_start_step_2d(index::Symbol, index_range)
+#     index_to_start_step_2d(index::IndexInfo, index_range)
 #
 # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`),
 # return `index_start, index_step`, i.e., a tuple containing
@@ -41,17 +41,17 @@ end
 #       i_volume += i_volume_step
 #       j_volume += j_volume_step
 #     end
-@inline function index_to_start_step_2d(index::Symbol, index_range)
+@inline function index_to_start_step_2d(index::IndexInfo, index_range)
     index_begin = first(index_range)
     index_end = last(index_range)
 
-    if index === :begin
+    if index === Indexing.first
         return index_begin, 0
-    elseif index === :end
+    elseif index === Indexing.last
         return index_end, 0
-    elseif index === :i_forward
+    elseif index === Indexing.i_forward
         return index_begin, 1
-    else # if index === :i_backward
+    else # if index === Indexing.i_backward
         return index_end, -1
     end
 end
@@ -145,7 +145,7 @@ function calc_interface_flux!(surface_flux_values,
         # Initiate the secondary index to be used in the surface for loop.
         # This index on the primary side will always run forward but
         # the secondary index might need to run backwards for flipped sides.
-        if :i_backward in secondary_indices
+        if Indexing.i_backward in secondary_indices
             node_secondary = index_end
             node_secondary_step = -1
         else
@@ -606,7 +606,7 @@ end
     large_indices = node_indices[2, mortar]
     large_direction = indices2direction(large_indices)
 
-    if :i_backward in large_indices
+    if Indexing.i_backward in large_indices
         for i in eachnode(dg)
             for v in eachvariable(equations)
                 surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v,
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index ed21f371449..7ce6b26ed3c 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -427,7 +427,7 @@ end
     large_indices = node_indices[2, mortar]
     large_direction = indices2direction(large_indices)
 
-    if :i_backward in large_indices
+    if Indexing.i_backward in large_indices
         for i in eachnode(dg)
             for v in eachvariable(equations)
                 surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v,
@@ -636,7 +636,7 @@ function calc_interface_flux!(surface_flux_values,
         # Initiate the secondary index to be used in the surface for loop.
         # This index on the primary side will always run forward but
         # the secondary index might need to run backwards for flipped sides.
-        if :i_backward in secondary_indices
+        if Indexing.i_backward in secondary_indices
             node_secondary = index_end
             node_secondary_step = -1
         else
diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl
index 3bf0cd0cab5..fcefa2089a9 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl
@@ -71,7 +71,7 @@ function calc_mpi_interface_flux!(surface_flux_values,
 
         # Initiate the node index to be used in the surface for loop,
         # the surface flux storage must be indexed in alignment with the local element indexing
-        if :i_backward in local_indices
+        if Indexing.i_backward in local_indices
             surface_node = index_end
             surface_node_step = -1
         else
@@ -308,7 +308,7 @@ end
             # correct orientation.
             # Note that the index of the small sides will always run forward but
             # the index of the large side might need to run backwards for flipped sides.
-            if :i_backward in large_indices
+            if Indexing.i_backward in large_indices
                 for i in eachnode(dg)
                     for v in eachvariable(equations)
                         surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v,
diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index 5b3c5ae5ca8..000fab384b7 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -24,7 +24,7 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
     (; fstar_threaded, fstar_tmp_threaded, u_threaded)
 end
 
-#     index_to_start_step_3d(index::Symbol, index_range)
+#     index_to_start_step_3d(index::IndexInfo, index_range)
 #
 # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`),
 # return `index_start, index_step_i, index_step_j`, i.e., a tuple containing
@@ -53,36 +53,38 @@ end
 #       j_volume += j_volume_step_j
 #       k_volume += k_volume_step_j
 #     end
-@inline function index_to_start_step_3d(index::Symbol, index_range)
+@inline function index_to_start_step_3d(index::IndexInfo, index_range)
     index_begin = first(index_range)
     index_end = last(index_range)
 
-    if index === :begin
+    if index === Indexing.first
         return index_begin, 0, 0
-    elseif index === :end
+    elseif index === Indexing.last
         return index_end, 0, 0
-    elseif index === :i_forward
+    elseif index === Indexing.i_forward
         return index_begin, 1, index_begin - index_end - 1
-    elseif index === :i_backward
+    elseif index === Indexing.i_backward
         return index_end, -1, index_end + 1 - index_begin
-    elseif index === :j_forward
+    elseif index === Indexing.j_forward
         return index_begin, 0, 1
-    else # if index === :j_backward
+    else # if index === Indexing.j_backward
         return index_end, 0, -1
     end
 end
 
-# Extract the two varying indices from a symbolic index tuple.
-# For example, `surface_indices((:i_forward, :end, :j_forward)) == (:i_forward, :j_forward)`.
-@inline function surface_indices(indices::NTuple{3, Symbol})
+# Extract the two varying indices from an IndexInfo tuple.
+# For example, 
+# `surface_indices((Indexing.i_forward, Indexing.last, Indexing.j_forward)) == 
+#    (Indexing.i_forward, Indexing.j_forward)`.
+@inline function surface_indices(indices::NTuple{3, IndexInfo})
     i1, i2, i3 = indices
     index = i1
-    (index === :begin || index === :end) && return (i2, i3)
+    (index === Indexing.first || index === Indexing.last) && return (i2, i3)
 
     index = i2
-    (index === :begin || index === :end) && return (i1, i3)
+    (index === Indexing.first || index === Indexing.last) && return (i1, i3)
 
-    # i3 in (:begin, :end)
+    # i3 in (Indexing.first, Indexing.last)
     return (i1, i2)
 end
 

From 87dd7b4012b220ff17df3f545b8afa1fc85ca3ed Mon Sep 17 00:00:00 2001
From: Lars Christmann <lars@l12n.eu>
Date: Tue, 2 Jul 2024 13:53:43 +0200
Subject: [PATCH 57/89] Support `adapt` for SemiHyperbolic based on P4estMesh

This change adds support for `Adapt.adapt` for objects of type
`SemidiscretizationHyperbolic` based on `P4estMesh`. This allows
integration of such objects with KernelAbstractions.jl. In particular,
these objects can be moved to a GPU now. This is necessary to
facilitate

Some architectural changes and enhancements were necessary to achieve
this.

The type system around the container types was extended. The abstract
type `AbstractContainer` has an abstract subtype
`AbstractHeterogeneousContainer{T, B}` now that represents a
container that supports heterogeneous computing via KA.jl.
This allows information about the backend and whether KA.jl is used
to be propagated and used in downstream functions via the type system.
The type requires subtypes to implement a few methods, in particular
they must support Adapt.jl.

The p4est solver containers now subtype this new type and were changed
accordingly to support the required methods.

To add support for Adapt.jl, the macro Adapt.@adapt_structure
can be used in some cases to generate the required method.
However, the macro always uses the default constructor of a type
which uses the types of the arguments as type parameters. Most
types, in particular the container types, use other type
parameters and thus must use custom implementations of
the `Adapt.adapt_structure` function. In some cases, these
are currently not type stable (this is WIP).

Besides the container types, some solver-related types like
`LobattoLegendreBasis` and boundary condition types
also need to support Adapt.jl. This support was added as well.

As of now, equations usually use `SVector`s as parameters which
are bitstypes if their eltype is a bitstype and so they do not
have to be explicitly transfered to a GPU to be used in kernels.
Nonetheless, the equations are adapted when the
semidiscretization is adapted. By default this is a no-op
but allows for easier extension in the future if necessary.

The cache object is a `NamedTuple` and thus handled by a
standard method provided by Adapt.jl. Some parts of the
cache related to Mortars (e.g. the threaded caches) are
arrays of arrays where Adapt.jl will try to convert the
outer array by default which is not desired. Hence, a small
wrapper `VecOfArrays` is provided which implements
the desired behavior for Adapt.jl and is used for threaded
caches and other "vector of arrays" type objects that
need Adapt.jl support.

The SemidiscretizationHyperbolic constructors have been changed so that
the performance counter can be preserved when a semidiscretization is
adapted.
---
 Project.toml                                  |  10 +-
 src/Trixi.jl                                  |   5 +-
 src/auxiliary/containers.jl                   |  49 +++
 src/auxiliary/vector_of_arrays.jl             |  22 ++
 .../semidiscretization_hyperbolic.jl          |  47 ++-
 src/solvers/dg.jl                             |   4 +
 src/solvers/dgsem/basis_lobatto_legendre.jl   |  35 ++
 src/solvers/dgsem_p4est/containers.jl         | 322 ++++++++++++++----
 .../dgsem_p4est/containers_parallel.jl        | 111 ++++--
 src/solvers/dgsem_p4est/dg_3d.jl              |   6 +-
 src/solvers/dgsem_p4est/dg_parallel.jl        |  77 +++--
 src/solvers/dgsem_unstructured/dg_2d.jl       |   2 +-
 .../sort_boundary_conditions.jl               |  25 +-
 13 files changed, 581 insertions(+), 134 deletions(-)
 create mode 100644 src/auxiliary/vector_of_arrays.jl

diff --git a/Project.toml b/Project.toml
index 995d8ec6558..c6aacba7643 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor
 version = "0.8.1-DEV"
 
 [deps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
 ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
@@ -16,6 +17,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
@@ -50,15 +52,16 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [weakdeps]
-Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
 ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
+Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 
 [extensions]
-TrixiMakieExt = "Makie"
 TrixiConvexECOSExt = ["Convex", "ECOS"]
+TrixiMakieExt = "Makie"
 
 [compat]
+Adapt = "3.7, 4.0"
 CodeTracking = "1.0.5"
 ConstructionBase = "1.3"
 Convex = "0.16"
@@ -73,6 +76,7 @@ FillArrays = "0.13.2, 1"
 ForwardDiff = "0.10.24"
 HDF5 = "0.16.10, 0.17"
 IfElse = "0.1"
+KernelAbstractions = "0.9"
 LinearAlgebra = "1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.151"
@@ -109,6 +113,6 @@ UUIDs = "1.6"
 julia = "1.8"
 
 [extras]
-Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
 ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
+Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
diff --git a/src/Trixi.jl b/src/Trixi.jl
index b8364eef445..fea492c70b1 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -40,6 +40,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!,
                   terminate!, remake, add_tstop!, has_tstop, first_tstop
 
 using Downloads: Downloads
+import Adapt
 using CodeTracking: CodeTracking
 using ConstructionBase: ConstructionBase
 using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect
@@ -48,6 +49,7 @@ using FillArrays: Ones, Zeros
 using ForwardDiff: ForwardDiff
 using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace
 using IfElse: ifelse
+using KernelAbstractions
 using LinearMaps: LinearMap
 using LoopVectorization: LoopVectorization, @turbo, indices
 using StaticArrayInterface: static_length # used by LoopVectorization
@@ -59,7 +61,7 @@ using P4est
 using T8code
 using RecipesBase: RecipesBase
 using Requires: @require
-using Static: Static, One, True, False
+using Static: Static, One, StaticBool, True, False
 @reexport using StaticArrays: SVector
 using StaticArrays: StaticArrays, MVector, MArray, SMatrix, @SMatrix
 using StrideArrays: PtrArray, StrideArray, StaticInt
@@ -122,6 +124,7 @@ include("auxiliary/auxiliary.jl")
 include("auxiliary/mpi.jl")
 include("auxiliary/p4est.jl")
 include("auxiliary/t8code.jl")
+include("auxiliary/vector_of_arrays.jl")
 include("equations/equations.jl")
 include("meshes/meshes.jl")
 include("solvers/solvers.jl")
diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl
index 90650f6abcf..f17907cb98d 100644
--- a/src/auxiliary/containers.jl
+++ b/src/auxiliary/containers.jl
@@ -314,4 +314,53 @@ end
 function raw_copy!(c::AbstractContainer, from::Int, destination::Int)
     raw_copy!(c, c, from, from, destination)
 end
+
+# Containers that support heterogenous computing via KernelAbstractions.jl
+# should be subtypes of this type. 
+#
+# The first type parameter must be `Array` by default and if 
+# `Adapt.adapt_structure(to, container)` is called then this must be
+# `typeof(to)`. This is used in downstream code, e.g. for calling
+# `wrap_array` with an appropriate type after `resize!`ing.
+#
+# The second type parameter determines if KA.jl is used.
+# By default, each container must initialize this to `false`.
+# However, when `Adapt.adapt_structure` is called on the container, it must
+# be changed to `true`.
+abstract type AbstractHeterogeneousContainer{T, B} <: AbstractContainer end
+uses_ka(::Any) = false # need ::Any here since not all containers <: AbstractContainer
+uses_ka(::AbstractHeterogeneousContainer{T, B}) where {T, B} = B
+array_type(::AbstractHeterogeneousContainer{T}) where {T} = T
+function backend_or_nothing(c::AbstractContainer)
+    # Return KA backend if KA is used, else nothing
+    if uses_ka(c) # compile-time constant
+        return get_backend(c)
+    else
+        return nothing
+    end
+end
+# Subtypes must implement a method for these functions
+function Adapt.adapt_structure(to, ::AbstractHeterogeneousContainer)
+    error("required method not implemented")
+end
+function KernelAbstractions.get_backend(::AbstractHeterogeneousContainer)
+    erorr("required method not implemented")
+end
+
+# For some KA.jl backends like CUDA.jl, empty arrays do seem to simply be
+# null pointers which can cause `unsafe_wrap` to fail when calling
+# Adapt.adapt (ArgumentError, see
+# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229).
+# To circumvent this, on length zero arrays this allocates
+# a separate empty array instead of wrapping.
+# However, since zero length arrays are not used in calculations,
+# it should be okay if the underlying storage vectors and wrapped arrays
+# are not the same as long as they are properly wrapped when `resize!`d etc.
+function unsafe_wrap_or_alloc(to, vec, size)
+    if length(vec) == 0
+        return allocate(get_backend(vec), eltype(vec), size)
+    else
+        return unsafe_wrap(to, pointer(vec), size)
+    end
+end
 end # @muladd
diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl
new file mode 100644
index 00000000000..777ceae2256
--- /dev/null
+++ b/src/auxiliary/vector_of_arrays.jl
@@ -0,0 +1,22 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector.
+# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is
+# not possible for a plain Vector of Arrays.
+struct VecOfArrays{T <: AbstractArray}
+    arrays::Vector{T}
+end
+Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i)
+Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays)
+Base.size(v::VecOfArrays) = Base.size(v.arrays)
+Base.length(v::VecOfArrays) = Base.length(v.arrays)
+Base.eltype(v::VecOfArrays{T}) where {T} = T
+function Adapt.adapt_structure(to, v::VecOfArrays)
+    return [Adapt.adapt(to, arr) for arr in v.arrays] |> VecOfArrays
+end
+end # @muladd
diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
index 7c82a132a0b..b2f0616a331 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -30,21 +30,21 @@ struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
 
     function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
                                           BoundaryConditions, SourceTerms, Solver,
-                                          Cache}(mesh::Mesh, equations::Equations,
+                                          Cache}(mesh::Mesh,
+                                                 equations::Equations,
                                                  initial_condition::InitialCondition,
                                                  boundary_conditions::BoundaryConditions,
                                                  source_terms::SourceTerms,
                                                  solver::Solver,
-                                                 cache::Cache) where {Mesh, Equations,
-                                                                      InitialCondition,
-                                                                      BoundaryConditions,
-                                                                      SourceTerms,
-                                                                      Solver,
-                                                                      Cache}
+                                                 cache::Cache,
+                                                 performance_counter::PerformanceCounter) where {Mesh, Equations,
+                                                                                                 InitialCondition,
+                                                                                                 BoundaryConditions,
+                                                                                                 SourceTerms,
+                                                                                                 Solver,
+                                                                                                 Cache}
         @assert ndims(mesh) == ndims(equations)
 
-        performance_counter = PerformanceCounter()
-
         new(mesh, equations, initial_condition, boundary_conditions, source_terms,
             solver, cache, performance_counter)
     end
@@ -74,6 +74,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver
 
     check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions)
 
+    performance_counter = PerformanceCounter()
+
     SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
                                  typeof(initial_condition),
                                  typeof(_boundary_conditions), typeof(source_terms),
@@ -81,7 +83,7 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver
                                                                 initial_condition,
                                                                 _boundary_conditions,
                                                                 source_terms, solver,
-                                                                cache)
+                                                                cache, performance_counter)
 end
 
 # Create a new semidiscretization but change some parameters compared to the input.
@@ -103,6 +105,30 @@ function remake(semi::SemidiscretizationHyperbolic; uEltype = real(semi.solver),
                                  source_terms, boundary_conditions, uEltype)
 end
 
+function Adapt.adapt_structure(to, semi::SemidiscretizationHyperbolic)
+    if !(typeof(semi.mesh) <: P4estMesh)
+        error("Adapt.adapt is only supported for semidiscretizations based on P4estMesh")
+    end
+
+    mesh = semi.mesh
+    equations = Adapt.adapt_structure(to, semi.equations)
+    initial_condition = Adapt.adapt_structure(to, semi.initial_condition)
+    boundary_conditions = Adapt.adapt_structure(to, semi.boundary_conditions)
+    source_terms = Adapt.adapt_structure(to, semi.source_terms)
+    solver = Adapt.adapt_structure(to, semi.solver)
+    cache = Adapt.adapt_structure(to, semi.cache)
+    performance_counter = semi.performance_counter
+
+    SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
+                                 typeof(initial_condition),
+                                 typeof(boundary_conditions), typeof(source_terms),
+                                 typeof(solver), typeof(cache)}(mesh, equations,
+                                                                initial_condition,
+                                                                boundary_conditions,
+                                                                source_terms, solver,
+                                                                cache, performance_counter)
+end
+
 # general fallback
 function digest_boundary_conditions(boundary_conditions, mesh, solver, cache)
     boundary_conditions
@@ -316,6 +342,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperboli
         summary_line(io, "source terms", semi.source_terms)
         summary_line(io, "solver", semi.solver |> typeof |> nameof)
         summary_line(io, "total #DOFs per field", ndofsglobal(semi))
+        summary_line(io, "backend", backend_or_nothing(semi.cache.elements))
         summary_footer(io)
     end
 end
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 84741679191..9423e385c20 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -770,6 +770,10 @@ export IndexInfo
 end
 using .Indexing
 
+# Adapt.@adapt_structure macro must be outside of the same `begin ... end` block where
+# the type it is used on is defined, otherwise this throws an UndefVar
+Adapt.@adapt_structure DG
+
 # Discretizations specific to each mesh type of Trixi.jl
 # If some functionality is shared by multiple combinations of meshes/solvers,
 # it is defined in the directory of the most basic mesh and solver type.
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index cac1dba9c74..22e1b66cc17 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -128,6 +128,31 @@ In particular, not the nodes themselves are returned.
 
 @inline get_nodes(basis::LobattoLegendreBasis) = basis.nodes
 
+function Adapt.adapt_structure(to, basis::LobattoLegendreBasis)
+    # Do not adapt SVector fields, i.e. nodes, weights and inverse_weights
+    (; nodes, weights, inverse_weights) = basis
+    inverse_vandermonde_legendre = Adapt.adapt_structure(to,
+                                                         basis.inverse_vandermonde_legendre)
+    boundary_interpolation = basis.boundary_interpolation
+    derivative_matrix = Adapt.adapt_structure(to, basis.derivative_matrix)
+    derivative_split = Adapt.adapt_structure(to, basis.derivative_split)
+    derivative_split_transpose = Adapt.adapt_structure(to,
+                                                       basis.derivative_split_transpose)
+    derivative_dhat = Adapt.adapt_structure(to, basis.derivative_dhat)
+    return LobattoLegendreBasis{real(basis), nnodes(basis), typeof(basis.nodes),
+                                typeof(inverse_vandermonde_legendre),
+                                typeof(boundary_interpolation),
+                                typeof(derivative_matrix)}(nodes,
+                                                           weights,
+                                                           inverse_weights,
+                                                           inverse_vandermonde_legendre,
+                                                           boundary_interpolation,
+                                                           derivative_matrix,
+                                                           derivative_split,
+                                                           derivative_split_transpose,
+                                                           derivative_dhat)
+end
+
 """
     integrate(f, u, basis::LobattoLegendreBasis)
 
@@ -216,6 +241,16 @@ end
 
 @inline polydeg(mortar::LobattoLegendreMortarL2) = nnodes(mortar) - 1
 
+function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2)
+    forward_upper = Adapt.adapt_structure(to, mortar.forward_upper)
+    forward_lower = Adapt.adapt_structure(to, mortar.forward_lower)
+    reverse_upper = Adapt.adapt_structure(to, mortar.reverse_upper)
+    reverse_lower = Adapt.adapt_structure(to, mortar.reverse_lower)
+    return LobattoLegendreMortarL2{real(mortar), nnodes(mortar), typeof(forward_upper),
+                                   typeof(reverse_upper)}(forward_upper, forward_lower,
+                                                          reverse_upper, reverse_lower)
+end
+
 # TODO: We can create EC mortars along the lines of the following implementation.
 # abstract type AbstractMortarEC{RealT} <: AbstractMortar{RealT} end
 
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index c6522c3f6b5..082b0ab9304 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -6,25 +6,32 @@
 #! format: noindent
 
 mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1,
-                                     NDIMSP2, NDIMSP3} <: AbstractContainer
+                                     NDIMSP2, NDIMSP3,
+                                     ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1},
+                                     ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2},
+                                     ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3},
+                                     VectorRealT <: DenseVector{RealT},
+                                     VectoruEltype <: DenseVector{uEltype},
+                                     ArrayType, Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
     # Physical coordinates at each node
-    node_coordinates::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
+    node_coordinates::ArrayNDIMSP2   # [orientation, node_i, node_j, node_k, element]
     # Jacobian matrix of the transformation
     # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
-    jacobian_matrix::Array{RealT, NDIMSP3}
+    jacobian_matrix::ArrayNDIMSP3
     # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
-    contravariant_vectors::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
+    contravariant_vectors::ArrayNDIMSP3   # [dimension, index, node_i, node_j, node_k, element]
     # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
-    inverse_jacobian::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
+    inverse_jacobian::ArrayNDIMSP1   # [node_i, node_j, node_k, element]
     # Buffer for calculated surface flux
-    surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
+    surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element]
 
     # internal `resize!`able storage
-    _node_coordinates::Vector{RealT}
-    _jacobian_matrix::Vector{RealT}
-    _contravariant_vectors::Vector{RealT}
-    _inverse_jacobian::Vector{RealT}
-    _surface_flux_values::Vector{uEltype}
+    _node_coordinates::VectorRealT
+    _jacobian_matrix::VectorRealT
+    _contravariant_vectors::VectorRealT
+    _inverse_jacobian::VectorRealT
+    _surface_flux_values::VectoruEltype
 end
 
 @inline function nelements(elements::P4estElementContainer)
@@ -51,28 +58,30 @@ function Base.resize!(elements::P4estElementContainer, capacity)
     n_dims = ndims(elements)
     n_nodes = size(elements.node_coordinates, 2)
     n_variables = size(elements.surface_flux_values, 1)
+    ArrayType = array_type(elements)
 
     resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity)
-    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+    elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates),
                                             (n_dims, ntuple(_ -> n_nodes, n_dims)...,
                                              capacity))
 
     resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity)
-    elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
+    elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix),
                                            (n_dims, n_dims,
                                             ntuple(_ -> n_nodes, n_dims)..., capacity))
 
     resize!(_contravariant_vectors, length(_jacobian_matrix))
-    elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
+    elements.contravariant_vectors = unsafe_wrap(ArrayType,
+                                                 pointer(_contravariant_vectors),
                                                  size(elements.jacobian_matrix))
 
     resize!(_inverse_jacobian, n_nodes^n_dims * capacity)
-    elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
+    elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian),
                                             (ntuple(_ -> n_nodes, n_dims)..., capacity))
 
     resize!(_surface_flux_values,
             n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity)
-    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+    elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values),
                                                (n_variables,
                                                 ntuple(_ -> n_nodes, n_dims - 1)...,
                                                 n_dims * 2, capacity))
@@ -115,33 +124,106 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, RealT}, T8codeMesh{NDIMS, Re
                                        NDIMS * 2, nelements))
 
     elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2,
-                                     NDIMS + 3}(node_coordinates, jacobian_matrix,
-                                                contravariant_vectors,
-                                                inverse_jacobian, surface_flux_values,
-                                                _node_coordinates, _jacobian_matrix,
-                                                _contravariant_vectors,
-                                                _inverse_jacobian, _surface_flux_values)
+                                     NDIMS + 3, Array{RealT, NDIMS + 1},
+                                     Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3},
+                                     Vector{RealT}, Vector{uEltype}, Array, false}(node_coordinates,
+                                                                                   jacobian_matrix,
+                                                                                   contravariant_vectors,
+                                                                                   inverse_jacobian,
+                                                                                   surface_flux_values,
+                                                                                   _node_coordinates,
+                                                                                   _jacobian_matrix,
+                                                                                   _contravariant_vectors,
+                                                                                   _inverse_jacobian,
+                                                                                   _surface_flux_values)
 
     init_elements!(elements, mesh, basis)
     return elements
 end
 
-mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
-               AbstractContainer
-    u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
-    neighbor_ids::Matrix{Int}                   # [primary/secondary, interface]
-    node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [primary/secondary, interface]
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(elements::P4estElementContainer)
+    return KernelAbstractions.get_backend(elements.node_coordinates)
+end
+function Adapt.adapt_structure(to,
+                               elements::P4estElementContainer{NDIMS, RealT, uEltype}) where {
+                                                                                              NDIMS,
+                                                                                              RealT,
+                                                                                              uEltype
+                                                                                              }
+    # Adapt underlying storage
+    _node_coordinates = Adapt.adapt_structure(to, elements._node_coordinates)
+    _jacobian_matrix = Adapt.adapt_structure(to, elements._jacobian_matrix)
+    _contravariant_vectors = Adapt.adapt_structure(to, elements._contravariant_vectors)
+    _inverse_jacobian = Adapt.adapt_structure(to, elements._inverse_jacobian)
+    _surface_flux_values = Adapt.adapt_structure(to, elements._surface_flux_values)
+    
+    # Wrap arrays again
+    node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates,
+                                            size(elements.node_coordinates))
+    jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix,
+                                           size(elements.jacobian_matrix))
+    contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors,
+                                                 size(jacobian_matrix))
+    inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian,
+                                            size(elements.inverse_jacobian))
+    surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values,
+                                               size(elements.surface_flux_values))
+
+    new_type_params = (NDIMS,
+                       RealT,
+                       uEltype,
+                       NDIMS + 1,
+                       NDIMS + 2,
+                       NDIMS + 3,
+                       typeof(inverse_jacobian), # ArrayNDIMSP1
+                       typeof(node_coordinates), # ArrayNDIMSP2
+                       typeof(jacobian_matrix), # ArrayNDIMSP3
+                       typeof(_node_coordinates), # VectorRealT
+                       typeof(_surface_flux_values), # VectoruEltype
+                       to,
+                       true)
+    return P4estElementContainer{new_type_params...}(node_coordinates,
+                                                     jacobian_matrix,
+                                                     contravariant_vectors,
+                                                     inverse_jacobian,
+                                                     surface_flux_values,
+                                                     _node_coordinates,
+                                                     _jacobian_matrix,
+                                                     _contravariant_vectors,
+                                                     _inverse_jacobian,
+                                                     _surface_flux_values)
+end
+
+mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2,
+                                       uArray <: DenseArray{uEltype, NDIMSP2},
+                                       IdsMatrix <: DenseMatrix{Int},
+                                       IndicesMatrix <:
+                                       DenseMatrix{NTuple{NDIMS, IndexInfo}},
+                                       uVector <: DenseVector{uEltype},
+                                       IdsVector <: DenseVector{Int},
+                                       IndicesVector <:
+                                       DenseVector{NTuple{NDIMS, IndexInfo}},
+                                       ArrayType, Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
+    u::uArray       # [primary/secondary, variable, i, j, interface]
+    neighbor_ids::IdsMatrix                   # [primary/secondary, interface]
+    node_indices::IndicesMatrix # [primary/secondary, interface]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
-    _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
+    _u::uVector
+    _neighbor_ids::IdsVector
+    _node_indices::IndicesVector
 end
 
 @inline function ninterfaces(interfaces::P4estInterfaceContainer)
     size(interfaces.neighbor_ids, 2)
 end
 @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                               uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
@@ -150,17 +232,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
     n_dims = ndims(interfaces)
     n_nodes = size(interfaces.u, 3)
     n_variables = size(interfaces.u, 2)
+    ArrayType = array_type(interfaces)
 
     resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
-    interfaces.u = unsafe_wrap(Array, pointer(_u),
+    interfaces.u = unsafe_wrap(ArrayType, pointer(_u),
                                (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                 capacity))
 
     resize!(_neighbor_ids, 2 * capacity)
-    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity))
+    interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids),
+                                          (2, capacity))
 
     resize!(_node_indices, 2 * capacity)
-    interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices),
+                                          (2, capacity))
 
     return nothing
 end
@@ -186,10 +271,16 @@ function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e
     _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_interfaces)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces))
 
-    interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids,
-                                                                    node_indices,
-                                                                    _u, _neighbor_ids,
-                                                                    _node_indices)
+    interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2,
+                                         typeof(u), typeof(neighbor_ids),
+                                         typeof(node_indices), typeof(_u),
+                                         typeof(_neighbor_ids), typeof(_node_indices),
+                                         Array, false}(u,
+                                                       neighbor_ids,
+                                                       node_indices,
+                                                       _u,
+                                                       _neighbor_ids,
+                                                       _node_indices)
 
     init_interfaces!(interfaces, mesh)
 
@@ -202,21 +293,57 @@ function init_interfaces!(interfaces, mesh::P4estMesh)
     return interfaces
 end
 
-mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <:
-               AbstractContainer
-    u::Array{uEltype, NDIMSP1}       # [variables, i, j, boundary]
-    neighbor_ids::Vector{Int}                   # [boundary]
-    node_indices::Vector{NTuple{NDIMS, IndexInfo}} # [boundary]
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(interfaces::P4estInterfaceContainer)
+    return KernelAbstractions.get_backend(interfaces.u)
+end
+function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer)
+    # Adapt underlying storage
+    _u = Adapt.adapt_structure(to, interfaces._u)
+    _neighbor_ids = Adapt.adapt_structure(to, interfaces._neighbor_ids)
+    _node_indices = Adapt.adapt_structure(to, interfaces._node_indices)
+    # Wrap arrays again
+    u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u))
+    neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(interfaces.neighbor_ids))
+    node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(interfaces.node_indices))
+
+    NDIMS = ndims(interfaces)
+    new_type_params = (NDIMS,
+                       eltype(interfaces),
+                       NDIMS + 2,
+                       typeof(u), typeof(neighbor_ids), typeof(node_indices),
+                       typeof(_u), typeof(_neighbor_ids), typeof(_node_indices),
+                       to,
+                       true)
+    return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices,
+                                                       _u, _neighbor_ids, _node_indices)
+end
+
+mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1,
+                                      uArray <: DenseArray{uEltype, NDIMSP1},
+                                      IdsVector <: DenseVector{Int},
+                                      IndicesVector <:
+                                      DenseVector{NTuple{NDIMS, IndexInfo}},
+                                      uVector <: DenseVector{uEltype}, ArrayType,
+                                      Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
+    u::uArray       # [variables, i, j, boundary]
+    neighbor_ids::IdsVector                 # [boundary]
+    node_indices::IndicesVector # [boundary]
     name::Vector{Symbol}                # [boundary]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
 end
 
 @inline function nboundaries(boundaries::P4estBoundaryContainer)
     length(boundaries.neighbor_ids)
 end
 @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                              uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
@@ -225,9 +352,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
     n_dims = ndims(boundaries)
     n_nodes = size(boundaries.u, 2)
     n_variables = size(boundaries.u, 1)
+    ArrayType = array_type(boundaries)
 
     resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity)
-    boundaries.u = unsafe_wrap(Array, pointer(_u),
+    boundaries.u = unsafe_wrap(ArrayType, pointer(_u),
                                (n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                 capacity))
 
@@ -259,9 +387,11 @@ function init_boundaries(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e
     node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, n_boundaries)
     names = Vector{Symbol}(undef, n_boundaries)
 
-    boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids,
-                                                                   node_indices, names,
-                                                                   _u)
+    boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u),
+                                        typeof(neighbor_ids), typeof(node_indices),
+                                        typeof(_u), Array, false}(u, neighbor_ids,
+                                                                  node_indices, names,
+                                                                  _u)
 
     if n_boundaries > 0
         init_boundaries!(boundaries, mesh)
@@ -308,6 +438,24 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh)
     return nothing
 end
 
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(boundaries::P4estBoundaryContainer)
+    return KernelAbstractions.get_backend(boundaries.u)
+end
+function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer)
+    _u = Adapt.adapt_structure(to, boundaries._u)
+    u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u))
+    neighbor_ids = Adapt.adapt_structure(to, boundaries.neighbor_ids)
+    node_indices = Adapt.adapt_structure(to, boundaries.node_indices)
+    name = boundaries.name
+    
+    NDIMS = ndims(boundaries)
+    return P4estBoundaryContainer{NDIMS, eltype(boundaries), NDIMS + 1, typeof(u),
+                                  typeof(neighbor_ids), typeof(node_indices),
+                                  typeof(_u), to, true}(u, neighbor_ids, node_indices,
+                                                        name, _u)
+end
+
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D)
@@ -333,20 +481,33 @@ end
 # │ └─────────────┴─────────────┘  └───────────────────────────┘
 # │
 # ⋅────> ξ
-mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <:
-               AbstractContainer
-    u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
-    neighbor_ids::Matrix{Int}             # [position, mortar]
-    node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [small/large, mortar]
+mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3,
+                                    uArray <: DenseArray{uEltype, NDIMSP3},
+                                    IdsMatrix <: DenseMatrix{Int},
+                                    IndicesMatrix <:
+                                    DenseMatrix{NTuple{NDIMS, IndexInfo}},
+                                    uVector <: DenseVector{uEltype},
+                                    IdsVector <: DenseVector{Int},
+                                    IndicesVector <:
+                                    DenseVector{NTuple{NDIMS, IndexInfo}},
+                                    ArrayType, Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
+    u::uArray # [small/large side, variable, position, i, j, mortar]
+    neighbor_ids::IdsMatrix # [position, mortar]
+    node_indices::IndicesMatrix # [small/large, mortar]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
-    _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
+    _u::uVector
+    _neighbor_ids::IdsVector
+    _node_indices::IndicesVector
 end
 
 @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2)
 @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                            uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mortars::P4estMortarContainer, capacity)
@@ -355,18 +516,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity)
     n_dims = ndims(mortars)
     n_nodes = size(mortars.u, 4)
     n_variables = size(mortars.u, 2)
+    ArrayType = array_type(mortars)
 
     resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity)
-    mortars.u = unsafe_wrap(Array, pointer(_u),
+    mortars.u = unsafe_wrap(ArrayType, pointer(_u),
                             (2, n_variables, 2^(n_dims - 1),
                              ntuple(_ -> n_nodes, n_dims - 1)..., capacity))
 
     resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity)
-    mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+    mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids),
                                        (2^(n_dims - 1) + 1, capacity))
 
     resize!(_node_indices, 2 * capacity)
-    mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity))
 
     return nothing
 end
@@ -393,12 +555,15 @@ function init_mortars(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elem
     _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_mortars)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars))
 
-    mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u,
-                                                                         neighbor_ids,
-                                                                         node_indices,
-                                                                         _u,
-                                                                         _neighbor_ids,
-                                                                         _node_indices)
+    mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u),
+                                   typeof(neighbor_ids), typeof(node_indices),
+                                   typeof(_u), typeof(_neighbor_ids),
+                                   typeof(_node_indices), Array, false}(u,
+                                                                        neighbor_ids,
+                                                                        node_indices,
+                                                                        _u,
+                                                                        _neighbor_ids,
+                                                                        _node_indices)
 
     if n_mortars > 0
         init_mortars!(mortars, mesh)
@@ -413,6 +578,35 @@ function init_mortars!(mortars, mesh::P4estMesh)
     return mortars
 end
 
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(mortars::P4estMortarContainer)
+    return KernelAbstractions.get_backend(mortars.u)
+end
+function Adapt.adapt_structure(to, mortars::P4estMortarContainer)
+    # Adapt underlying storage
+    _u = Adapt.adapt_structure(to, mortars._u)
+    _neighbor_ids = Adapt.adapt_structure(to, mortars._neighbor_ids)
+    _node_indices = Adapt.adapt_structure(to, mortars._node_indices)
+
+    # Wrap arrays again
+    u = unsafe_wrap_or_alloc(to, _u, size(mortars.u))
+    neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids))
+    node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices))
+
+
+    NDIMS = ndims(mortars)
+    new_type_params = (NDIMS,
+                       eltype(mortars),
+                       NDIMS + 1,
+                       NDIMS + 3,
+                       typeof(u), typeof(neighbor_ids), typeof(node_indices),
+                       typeof(_u), typeof(_neighbor_ids), typeof(_node_indices),
+                       to,
+                       true)
+    return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices,
+                                                    _u, _neighbor_ids, _node_indices)
+end
+
 function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache)
     # Re-initialize elements container
     @unpack elements = cache
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index 949872bfa0f..4007e2059ca 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -5,15 +5,21 @@
 @muladd begin
 #! format: noindent
 
-mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
-               AbstractContainer
-    u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
-    local_neighbor_ids::Vector{Int}                   # [interface]
-    node_indices::Vector{NTuple{NDIMS, IndexInfo}} # [interface]
-    local_sides::Vector{Int}                   # [interface]
+mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2,
+                                          uArray <: DenseArray{uEltype, NDIMSP2},
+                                          VecInt <: DenseVector{Int},
+                                          IndicesVector <:
+                                          DenseVector{NTuple{NDIMS, IndexInfo}},
+                                          uVector <: DenseVector{uEltype},
+                                          ArrayType, Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
+    u::uArray      # [primary/secondary, variable, i, j, interface]
+    local_neighbor_ids::VecInt                   # [interface]
+    node_indices::IndicesVector # [interface]
+    local_sides::VecInt                   # [interface]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
 end
 
 @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer)
@@ -27,9 +33,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity)
     n_dims = ndims(mpi_interfaces)
     n_nodes = size(mpi_interfaces.u, 3)
     n_variables = size(mpi_interfaces.u, 2)
+    ArrayType = array_type(mpi_interfaces)
 
     resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
-    mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
+    mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u),
                                    (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                     capacity))
 
@@ -64,11 +71,14 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh},
 
     local_sides = Vector{Int}(undef, n_mpi_interfaces)
 
-    mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u,
-                                                                           local_neighbor_ids,
-                                                                           node_indices,
-                                                                           local_sides,
-                                                                           _u)
+    mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2,
+                                                typeof(u), typeof(local_neighbor_ids),
+                                                typeof(node_indices), typeof(_u),
+                                                Array, false}(u,
+                                                              local_neighbor_ids,
+                                                              node_indices,
+                                                              local_sides,
+                                                              _u)
 
     init_mpi_interfaces!(mpi_interfaces, mesh)
 
@@ -81,6 +91,29 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh)
     return mpi_interfaces
 end
 
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(mpi_interfaces::P4estMPIInterfaceContainer)
+    return KernelAbstractions.get_backend(interfaces.u)
+end
+function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer)
+    # Adapt Vectors and underlying storage
+    _u = Adapt.adapt_structure(to, mpi_interfaces._u)
+    local_neighbor_ids = Adapt.adapt_structure(to, mpi_interfaces.local_neighbor_ids)
+    node_indices = Adapt.adapt_structure(to, mpi_interfaces.node_indices)
+    local_sides = Adapt.adapt_structure(to, mpi_interfaces.local_sides)
+
+    # Wrap array again
+    u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u))
+
+    NDIMS = ndims(mpi_interfaces)
+    return P4estMPIInterfaceContainer{NDIMS, eltype(mpi_interfaces.u),
+                                      NDIMS + 2,
+                                      typeof(u), typeof(local_neighbor_ids),
+                                      typeof(node_indices), typeof(_u),
+                                      to, true}(u, local_neighbor_ids, node_indices,
+                                                local_sides, _u)
+end
+
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into
@@ -88,14 +121,18 @@ end
 # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds
 # the normal vectors on the surface of the small elements for each mortar.
 mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1,
-                                       NDIMSP2, NDIMSP3} <: AbstractContainer
-    u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
+                                       NDIMSP2, NDIMSP3,
+                                       uArray <: DenseArray{uEltype, NDIMSP3},
+                                       uVector <: DenseVector{uEltype},
+                                       ArrayType, Bool} <:
+               AbstractHeterogeneousContainer{ArrayType, Bool}
+    u::uArray # [small/large side, variable, position, i, j, mortar]
     local_neighbor_ids::Vector{Vector{Int}} # [mortar]
     local_neighbor_positions::Vector{Vector{Int}} # [mortar]
     node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [small/large, mortar]
     normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar]
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
     _node_indices::Vector{NTuple{NDIMS, IndexInfo}}
     _normal_directions::Vector{RealT}
 end
@@ -164,11 +201,13 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq
                                      2^(NDIMS - 1), n_mpi_mortars))
 
     mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2,
-                                          NDIMS + 3}(u, local_neighbor_ids,
-                                                     local_neighbor_positions,
-                                                     node_indices, normal_directions,
-                                                     _u, _node_indices,
-                                                     _normal_directions)
+                                          NDIMS + 3, typeof(u),
+                                          typeof(_u), 
+                                          Array, false}(u, local_neighbor_ids,
+                                                        local_neighbor_positions,
+                                                        node_indices, normal_directions,
+                                                        _u, _node_indices,
+                                                        _normal_directions)
 
     if n_mpi_mortars > 0
         init_mpi_mortars!(mpi_mortars, mesh, basis, elements)
@@ -184,6 +223,36 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements
     return mpi_mortars
 end
 
+# Required methods due to <: AbstractHeterogeneousContainer
+function KernelAbstractions.get_backend(mpi_interfaces::P4estMPIMortarContainer)
+    return KernelAbstractions.get_backend(mortars.u)
+end
+function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer)
+    # TODO: Vector of Vector type data structure does not work on GPUs,
+    # must be redesigned. This skeleton implementation here just exists just
+    # for compatability with the rest of the KA.jl solver code
+
+    _u = Adapt.adapt_structure(to, mpi_mortars._u)
+    _node_indices = mpi_mortars._node_indices
+    _normal_directions = mpi_mortars._normal_directions
+
+    u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u))
+    local_neighbor_ids = mpi_mortars.local_neighbor_ids
+    local_neighbor_positions = mpi_mortars.local_neighbor_positions
+    node_indices = mpi_mortars.node_indices
+    normal_directions = mpi_mortars.normal_directions
+
+    NDIMS = ndims(mpi_mortars)
+    return P4estMPIMortarContainer{NDIMS, eltype(_u),
+                                   eltype(_normal_directions),
+                                   NDIMS + 1, NDIMS + 2, NDIMS + 3,
+                                   typeof(u), typeof(_u),
+                                   to, true}(u, local_neighbor_ids,
+                                             local_neighbor_positions,
+                                             node_indices, normal_directions, _u,
+                                             _node_indices, _normal_directions)
+end
+
 # Overload init! function for regular interfaces, regular mortars and boundaries since they must
 # call the appropriate init_surfaces! function for parallel p4est meshes
 function init_interfaces!(interfaces, mesh::ParallelP4estMesh)
diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index 000fab384b7..5e9f21b0a0a 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -12,14 +12,14 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
     # TODO: Taal compare performance of different types
     fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2),
                                         nnodes(mortar_l2), 4)
-                      for _ in 1:Threads.nthreads()]
+                      for _ in 1:Threads.nthreads()] |> VecOfArrays
 
     fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations),
                                             nnodes(mortar_l2), nnodes(mortar_l2))
-                          for _ in 1:Threads.nthreads()]
+                          for _ in 1:Threads.nthreads()] |> VecOfArrays
     u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2),
                                     nnodes(mortar_l2))
-                  for _ in 1:Threads.nthreads()]
+                  for _ in 1:Threads.nthreads()] |> VecOfArrays
 
     (; fstar_threaded, fstar_tmp_threaded, u_threaded)
 end
diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl
index eaa6ab5cee2..8664e6a2bae 100644
--- a/src/solvers/dgsem_p4est/dg_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_parallel.jl
@@ -5,12 +5,12 @@
 @muladd begin
 #! format: noindent
 
-mutable struct P4estMPICache{uEltype}
+mutable struct P4estMPICache{BufferType <: DenseVector, VecInt <: DenseVector{<:Integer}}
     mpi_neighbor_ranks::Vector{Int}
-    mpi_neighbor_interfaces::Vector{Vector{Int}}
-    mpi_neighbor_mortars::Vector{Vector{Int}}
-    mpi_send_buffers::Vector{Vector{uEltype}}
-    mpi_recv_buffers::Vector{Vector{uEltype}}
+    mpi_neighbor_interfaces::VecOfArrays{VecInt}
+    mpi_neighbor_mortars::VecOfArrays{VecInt}
+    mpi_send_buffers::VecOfArrays{BufferType}
+    mpi_recv_buffers::VecOfArrays{BufferType}
     mpi_send_requests::Vector{MPI.Request}
     mpi_recv_requests::Vector{MPI.Request}
     n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
@@ -25,25 +25,47 @@ function P4estMPICache(uEltype)
     end
 
     mpi_neighbor_ranks = Vector{Int}(undef, 0)
-    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
-    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
-    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
-    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
+    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
+    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
     mpi_send_requests = Vector{MPI.Request}(undef, 0)
     mpi_recv_requests = Vector{MPI.Request}(undef, 0)
     n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
     n_elements_global = 0
     first_element_global_id = 0
 
-    P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
-                           mpi_neighbor_mortars,
-                           mpi_send_buffers, mpi_recv_buffers,
-                           mpi_send_requests, mpi_recv_requests,
-                           n_elements_by_rank, n_elements_global,
-                           first_element_global_id)
+    P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks,
+                                                mpi_neighbor_interfaces,
+                                                mpi_neighbor_mortars,
+                                                mpi_send_buffers, mpi_recv_buffers,
+                                                mpi_send_requests, mpi_recv_requests,
+                                                n_elements_by_rank, n_elements_global,
+                                                first_element_global_id)
 end
 
-@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype
+@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType)
+
+function Adapt.adapt_structure(to, mpi_cache::P4estMPICache)
+    mpi_neighbor_ranks = mpi_cache.mpi_neighbor_ranks
+    mpi_neighbor_interfaces = mpi_cache.mpi_neighbor_interfaces
+    mpi_neighbor_mortars = mpi_cache.mpi_neighbor_mortars
+    mpi_send_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_send_buffers)
+    mpi_recv_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_recv_buffers)
+    mpi_send_requests = mpi_cache.mpi_send_requests
+    mpi_recv_requests = mpi_cache.mpi_recv_requests
+    n_elements_by_rank = mpi_cache.n_elements_by_rank
+    n_elements_global = mpi_cache.n_elements_global
+    first_element_global_id = mpi_cache.first_element_global_id
+
+    @assert eltype(mpi_send_buffers) == eltype(mpi_recv_buffers)
+    BufferType = eltype(mpi_send_buffers)
+    return P4estMPICache{BufferType}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
+                                     mpi_neighbor_mortars, mpi_send_buffers,
+                                     mpi_recv_buffers, mpi_send_requests,
+                                     mpi_recv_requests, n_elements_by_rank,
+                                     n_elements_global, first_element_global_id)
+end
 
 function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
     data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
@@ -253,16 +275,16 @@ end
 
 function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
                          mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype)
-    mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
-                                                                                                       mpi_mortars,
-                                                                                                       mesh)
+    mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
+                                                                                                         mpi_mortars,
+                                                                                                         mesh)
 
-    mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces,
-                                                                                                        mpi_neighbor_mortars,
-                                                                                                        ndims(mesh),
-                                                                                                        nvars,
-                                                                                                        n_nodes,
-                                                                                                        uEltype)
+    _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces,
+                                                                                                          _mpi_neighbor_mortars,
+                                                                                                          ndims(mesh),
+                                                                                                          nvars,
+                                                                                                          n_nodes,
+                                                                                                          uEltype)
 
     # Determine local and total number of elements
     n_elements_global = Int(mesh.p4est.global_num_quadrants[])
@@ -274,6 +296,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
     first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1
     @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
 
+    mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces)
+    mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars)
+    mpi_send_buffers = VecOfArrays(_mpi_send_buffers)
+    mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers)
+
     # TODO reuse existing structures
     @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
                        mpi_neighbor_mortars,
diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index ce602e178d8..77e5df9f3f8 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -330,7 +330,7 @@ end
 # Iterate over tuples of boundary condition types and associated indices
 # in a type-stable way using "lispy tuple programming".
 function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
-                                     BC_indices::NTuple{N, Vector{Int}},
+                                     BC_indices::NTuple{N, <:AbstractVector{<:Integer}},
                                      mesh::Union{UnstructuredMesh2D, P4estMesh,
                                                  T8codeMesh},
                                      equations, surface_integral, dg::DG) where {N}
diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
index 2c2c6876d70..10bff36f34f 100644
--- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
+++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
@@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e
 during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions
 set by the user in the elixir file is also stored for printing.
 """
-mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}}
+mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any},
+                                               Vec <: AbstractVector{<:Integer}}
     boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet
-    boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices
+    boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices
     boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file
     boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier
 end
@@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache)
     boundary_symbol_indices = Dict{Symbol, Vector{Int}}()
 
     container = UnstructuredSortedBoundaryTypes{n_boundary_types,
-                                                typeof(boundary_condition_types)}(boundary_condition_types,
-                                                                                  boundary_indices,
-                                                                                  boundary_conditions,
-                                                                                  boundary_symbol_indices)
+                                                typeof(boundary_condition_types),
+                                                Vector{Int}}(boundary_condition_types,
+                                                             boundary_indices,
+                                                             boundary_conditions,
+                                                             boundary_symbol_indices)
 
     initialize!(container, cache)
 end
@@ -111,4 +113,15 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N
 
     return boundary_types_container
 end
+
+function Adapt.adapt_structure(to, bcs::UnstructuredSortedBoundaryTypes)
+    boundary_indices = Adapt.adapt_structure(to, bcs.boundary_indices)
+    n_boundary_types = length(bcs.boundary_condition_types)
+    return UnstructuredSortedBoundaryTypes{n_boundary_types,
+                                           typeof(bcs.boundary_condition_types),
+                                           eltype(boundary_indices)}(bcs.boundary_condition_types,
+                                                                     boundary_indices,
+                                                                     bcs.boundary_dictionary,
+                                                                     bcs.boundary_symbol_indices)
+end
 end # @muladd

From a686f430821c2dab9771d436e060120097280546 Mon Sep 17 00:00:00 2001
From: Lars Christmann <lars@l12n.eu>
Date: Tue, 2 Jul 2024 13:58:34 +0200
Subject: [PATCH 58/89] Implement basic 3D GPU kernels including MPI
 communication

Based on previously added support for Adapt.jl, it is now possible to
create an ODEProblem on a different backend by providing optional the
`adapt_to` keyword argument to `semidiscretize`. Under the hood
this calculates the initial condition as before and then calls
Adapt.jl on the semidiscretization and the initial solution.

The GPU kernels are mostly separated from existing code and are
reached via dispatching on the backend obtained from one of the
container types via the type system. The kernels are very basic
and do not include optimizations yet. The MPI communication works
but is very slow. In particular, a simulation with several GPUs may
be slower than a simulation with a single GPU.

The stepsize callback is explicitly GPU enabled while other callbacks
like the `AnalysisCallback` support the GPU by moving data to the CPU
and executing on the CPU afterwards.
---
 .../elixir_euler_source_terms_nonperiodic.jl  |   3 +-
 .../elixir_euler_taylor_green_vortex.jl       |  79 +++
 src/auxiliary/auxiliary.jl                    |  47 ++
 src/callbacks_step/analysis.jl                |  29 +-
 src/callbacks_step/stepsize_dg3d.jl           | 113 +++-
 src/semidiscretization/semidiscretization.jl  |  22 +-
 src/solvers/dg.jl                             |  14 +-
 .../dgsem_p4est/containers_parallel.jl        |   6 +-
 src/solvers/dgsem_p4est/dg.jl                 |   1 +
 src/solvers/dgsem_p4est/dg_3d.jl              |  60 ++
 src/solvers/dgsem_p4est/dg_3d_gpu.jl          | 591 ++++++++++++++++++
 src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl | 178 ++++++
 src/solvers/dgsem_p4est/dg_3d_parallel.jl     |  42 ++
 src/solvers/dgsem_p4est/dg_gpu_parallel.jl    |  76 +++
 src/solvers/dgsem_p4est/dg_parallel.jl        |  29 +-
 src/solvers/dgsem_structured/dg_3d.jl         |   8 +
 src/solvers/dgsem_tree/dg.jl                  |  11 +-
 src/solvers/dgsem_tree/dg_3d.jl               |  37 +-
 18 files changed, 1321 insertions(+), 25 deletions(-)
 create mode 100644 examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl
 create mode 100644 src/solvers/dgsem_p4est/dg_3d_gpu.jl
 create mode 100644 src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
 create mode 100644 src/solvers/dgsem_p4est/dg_gpu_parallel.jl

diff --git a/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
index fc5e4da3ceb..56b5f6f11ff 100644
--- a/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
+++ b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
@@ -1,6 +1,7 @@
 
 using OrdinaryDiffEq
 using Trixi
+using CUDA
 
 ###############################################################################
 # semidiscretization of the compressible Euler equations
@@ -37,7 +38,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
 # ODE solvers, callbacks etc.
 
 tspan = (0.0, 5.0)
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan; adapt_to=CuArray)
 
 summary_callback = SummaryCallback()
 
diff --git a/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl
new file mode 100644
index 00000000000..db9103564e8
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl
@@ -0,0 +1,79 @@
+using OrdinaryDiffEq
+using Trixi
+using CUDA
+CUDA.allowscalar(false)
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+"""
+    initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+
+The classical inviscid Taylor-Green vortex.
+"""
+function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+  A  = 1.0 # magnitude of speed
+  Ms = 0.1 # maximum Mach number
+
+  rho = 1.0
+  v1  =  A * sin(x[1]) * cos(x[2]) * cos(x[3])
+  v2  = -A * cos(x[1]) * sin(x[2]) * cos(x[3])
+  v3  = 0.0
+  p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+  p   = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3]))
+
+  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+initial_condition = initial_condition_taylor_green_vortex
+
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs))
+
+coordinates_min = (-1.0, -1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0,  1.0) .* pi
+
+# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`)
+trees_per_dimension = (4, 4, 4)
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 initial_refinement_level=2)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)#5.0)
+ode = semidiscretize(semi, tspan; adapt_to=CuArray)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true,
+                                     solution_variables=cons2prim)
+
+stepsize_callback = StepsizeCallback(cfl=0.9)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, 
+                        alive_callback,
+                        #save_solution,
+                        stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 6259e936737..530014cf643 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -330,4 +330,51 @@ function download(src_url, file_path)
 
     return file_path
 end
+
+# Returns u[:, indices...] as an SVector. size(u, 1) should thus be
+# known at compile time in the caller and passed via Val()
+@inline function get_svector(u, ::Val{N}, indices...) where {N}
+    # There is a cut-off at `n == 10` inside of the method
+    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
+    # in Julia `v1.5`, leading to type instabilities if
+    # more than ten variables are used. That's why we use
+    # `Val(...)` below.
+    # We use `@inline` to make sure that the `getindex` calls are
+    # really inlined, which might be the default choice of the Julia
+    # compiler for standard `Array`s but not necessarily for more
+    # advanced array types such as `PtrArray`s, cf.
+    # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55
+    SVector(ntuple(@inline(v->u[v, indices...]), N))
+end
+
+# Returns u[1, :, indices] and u[2, :, indices] as SVectors. size(u, 2)
+# should thus be known at compile time in the caller and passed via Val()
+@inline function get_svectors(u, ::Val{N}, indices...) where {N}
+    # There is a cut-off at `n == 10` inside of the method
+    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
+    # in Julia `v1.5`, leading to type instabilities if
+    # more than ten variables are used. That's why we use
+    # `Val(...)` below.
+    u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), N))
+    u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), N))
+    return u_ll, u_rr
+end
+
+@inline function add_to_first_axis!(u, u_node::SVector{N}, indices...) where {N}
+    for v in Base.OneTo(N)
+        u[v, indices...] += u_node[v]
+    end
+    return nothing
+end
+
+# Use this function instead of `add_to_first_axis!` to speed up
+# multiply-and-add-to-node-vars operations
+# See https://github.com/trixi-framework/Trixi.jl/pull/643
+@inline function multiply_add_to_first_axis!(u, factor, u_node::SVector{N},
+                                             indices...) where {N}
+    for v in Base.OneTo(N)
+        u[v, indices...] = u[v, indices...] + factor * u_node[v]
+    end
+    return nothing
+end
 end # @muladd
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 860e3fa21d3..24cf363dd95 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -144,7 +144,14 @@ function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
                      integrator) where {Condition, Affect! <: AnalysisCallback}
     semi = integrator.p
     du_ode = first(get_tmp_cache(integrator))
-    initialize!(cb, u_ode, du_ode, t, integrator, semi)
+    if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements)
+        semi_cpu = Adapt.adapt(Array, semi)
+        du_ode_cpu = Adapt.adapt(Array, du_ode)
+        u_ode_cpu = Adapt.adapt(Array, u_ode)
+        initialize!(cb, u_ode_cpu, du_ode_cpu, t, integrator, semi_cpu)
+    else
+        initialize!(cb, u_ode, du_ode, t, integrator, semi)
+    end
 end
 
 # This is the actual initialization method
@@ -227,7 +234,14 @@ function (analysis_callback::AnalysisCallback)(integrator)
     semi = integrator.p
     du_ode = first(get_tmp_cache(integrator))
     u_ode = integrator.u
-    analysis_callback(u_ode, du_ode, integrator, semi)
+    if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements)
+        semi_cpu = Adapt.adapt(Array, semi)
+        du_ode_cpu = Adapt.adapt(Array, du_ode)
+        u_ode_cpu = Adapt.adapt(Array, u_ode)
+        analysis_callback(u_ode_cpu, du_ode_cpu, integrator, semi_cpu)
+    else
+        analysis_callback(u_ode, du_ode, integrator, semi)
+    end
 end
 
 # This method gets called internally as the main entry point to the AnalysiCallback
@@ -578,8 +592,15 @@ function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition,
     @unpack analyzer = analysis_callback
     cache_analysis = analysis_callback.cache
 
-    l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi,
-                                            cache_analysis)
+    if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements)
+        semi_cpu = Adapt.adapt(Array, semi)
+        u_ode_cpu = Adapt.adapt(Array, sol.u[end])
+        l2_error, linf_error = calc_error_norms(u_ode_cpu, sol.t[end], analyzer, semi_cpu,
+                                                cache_analysis)
+    else
+        l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi,
+                                                cache_analysis)
+    end
     (; l2 = l2_error, linf = linf_error)
 end
 
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index 664596f989e..6afa4ac2d35 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -45,7 +45,113 @@ function max_dt(u, t, mesh::TreeMesh{3},
 end
 
 function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
-                constant_speed::False, equations, dg::DG, cache)
+                constant_speed, equations, dg::DG, cache)
+    backend = backend_or_nothing(cache.elements)
+    _max_dt(backend, u, t, mesh, constant_speed, equations, dg, cache)
+end
+
+@inline function _max_dt(backend::Backend, u, t,
+                         mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
+                         constant_speed::False, equations, dg::DG, cache)
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
+    num_elements = nelements(dg, cache)
+    nodes = eachnode(dg)
+    kernel! = max_scaled_speed_kernel!(backend)
+
+    max_scaled_speeds = allocate(backend, eltype(t), num_elements)
+    kernel!(max_scaled_speeds, u, constant_speed, equations, nodes,
+            contravariant_vectors,
+            inverse_jacobian; ndrange = num_elements)
+
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds))
+
+    return 2 / (nnodes(dg) * max_scaled_speed)
+end
+
+@kernel function max_scaled_speed_kernel!(max_scaled_speeds, u,
+                                          constant_speed::False, equations, nodes,
+                                          contravariant_vectors, inverse_jacobian)
+    element = @index(Global, Linear)
+    NVARS = Val(nvariables(equations))
+
+    max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(max_scaled_speeds))
+    for k in nodes, j in nodes, i in nodes
+        u_node = get_svector(u, NVARS, i, j, k, element)
+        lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
+
+        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
+        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
+        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
+
+        inv_jacobian = abs(inverse_jacobian[i, j, k, element])
+
+        max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
+        max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
+        max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
+    end
+
+    max_scaled_speeds[element] = max_lambda1 + max_lambda2 + max_lambda3
+end
+
+@inline function _max_dt(backend::Backend, u, t,
+                         mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
+                         constant_speed::True, equations, dg::DG, cache)
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
+    num_elements = nelements(dg, cache)
+    nodes = eachnode(dg)
+    kernel! = max_scaled_speed_kernel!(backend)
+
+    max_lambda1.max_lambda2.max_lambda3 = max_abs_speeds(equations)
+    max_scaled_speeds = allocate(backend, eltype(t), num_elements)
+    kernel!(max_scaled_speeds, constant_speed, nodes, contravariant_vectors,
+            inverse_jacobian, max_lambda1, max_lambda2, max_lambda3;
+            ndrange = num_elements)
+
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds))
+
+    return 2 / (nnodes(dg) * max_scaled_speed)
+end
+
+@kernel function max_scaled_speed_kernel!(max_scaled_speeds,
+                                          constant_speed::True, nodes,
+                                          contravariant_vectors, inverse_jacobian,
+                                          max_lambda1, max_lambda2, max_lambda3)
+    element = @index(Global, Linear)
+    for k in nodes, j in nodes, i in nodes
+        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
+                                  Ja13 * max_lambda3)
+        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
+                                  Ja23 * max_lambda3)
+        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                                    k, element)
+        lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
+                                  Ja33 * max_lambda3)
+
+        inv_jacobian = abs(inverse_jacobian[i, j, k, element])
+
+        max_scaled_speeds[element] = inv_jacobian *
+                                     (lambda1_transformed + lambda2_transformed +
+                                      lambda3_transformed)
+    end
+end
+
+@inline function _max_dt(::Nothing, u, t,
+                         mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
+                         constant_speed::False, equations, dg::DG, cache)
     # to avoid a division by zero if the speed vanishes everywhere,
     # e.g. for steady-state linear advection
     max_scaled_speed = nextfloat(zero(t))
@@ -82,8 +188,9 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}
     return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
-                constant_speed::True, equations, dg::DG, cache)
+@inline function _max_dt(::Nothing, u, t,
+                         mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
+                         constant_speed::True, equations, dg::DG, cache)
     # to avoid a division by zero if the speed vanishes everywhere,
     # e.g. for steady-state linear advection
     max_scaled_speed = nextfloat(zero(t))
diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index c6b82d5f37b..86401c788d7 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -78,12 +78,18 @@ function calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization,
 end
 
 """
-    semidiscretize(semi::AbstractSemidiscretization, tspan)
+    semidiscretize(semi::AbstractSemidiscretization, tspan; adapt_to = nothing)
 
 Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan`
 that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/).
+The optional keyword argument `adapt_to` controls whether `semi` is adapted via
+`Adapt.jl`. If it is not nothing, `semi` gets adapted to `adapt_to` before
+semidiscretizing it. If it is adapted, KernelAbstractions.jl will be used in
+the solver backend. The `adapt_to` keyword is only supported for
+`SemidiscretizationHyperbolic` objects that use a `P4estMesh` as their mesh.
 """
 function semidiscretize(semi::AbstractSemidiscretization, tspan;
+                        adapt_to = nothing,
                         reset_threads = true)
     # Optionally reset Polyester.jl threads. See
     # https://github.com/trixi-framework/Trixi.jl/issues/1583
@@ -98,7 +104,19 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan;
     #       See https://github.com/trixi-framework/Trixi.jl/issues/328
     iip = true # is-inplace, i.e., we modify a vector when calling rhs!
     specialize = SciMLBase.FullSpecialize # specialize on rhs! and parameters (semi)
-    return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi)
+
+    if !isnothing(adapt_to)
+        if !(typeof(semi) <: SemidiscretizationHyperbolic) && !(typeof(semi.mesh) <: P4estMesh)
+            throw(ArgumentError("adapt_to keyword argument not supported for this semidiscretization"))
+        end
+        semi_adapted = Adapt.adapt(adapt_to, semi)
+        backend = get_backend(semi_adapted.cache.elements)
+        _u0_ode = allocate(backend, eltype(u0_ode), size(u0_ode))
+        KernelAbstractions.copyto!(backend, _u0_ode, u0_ode)
+        return ODEProblem{iip, specialize}(rhs!, _u0_ode, tspan, semi_adapted)
+    else
+        return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi)
+    end
 end
 
 """
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 9423e385c20..5689a7e0dfc 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -649,7 +649,12 @@ end
         #  (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
     else
         # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
-        unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
+        if mesh isa P4estMesh
+            ArrayType = array_type(cache.elements)
+        else
+            ArrayType = Array
+        end
+        unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
                     (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
                      nelements(dg, cache)))
     end
@@ -693,7 +698,12 @@ end
         @assert length(u_ode) ==
                 nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
     end
-    unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
+    if mesh isa P4estMesh
+        ArrayType = array_type(cache.elements)
+    else
+        ArrayType = Array
+    end
+    unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
                 (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
                  nelements(dg, cache)))
 end
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index 4007e2059ca..fdd9441c7fb 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -93,7 +93,7 @@ end
 
 # Required methods due to <: AbstractHeterogeneousContainer
 function KernelAbstractions.get_backend(mpi_interfaces::P4estMPIInterfaceContainer)
-    return KernelAbstractions.get_backend(interfaces.u)
+    return KernelAbstractions.get_backend(mpi_interfaces.u)
 end
 function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer)
     # Adapt Vectors and underlying storage
@@ -224,8 +224,8 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements
 end
 
 # Required methods due to <: AbstractHeterogeneousContainer
-function KernelAbstractions.get_backend(mpi_interfaces::P4estMPIMortarContainer)
-    return KernelAbstractions.get_backend(mortars.u)
+function KernelAbstractions.get_backend(mpi_mortars::P4estMPIMortarContainer)
+    return KernelAbstractions.get_backend(mpi_mortars.u)
 end
 function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer)
     # TODO: Vector of Vector type data structure does not work on GPUs,
diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl
index 10cc075089c..d667d4ca16f 100644
--- a/src/solvers/dgsem_p4est/dg.jl
+++ b/src/solvers/dgsem_p4est/dg.jl
@@ -51,6 +51,7 @@ include("dg_2d.jl")
 include("dg_2d_parabolic.jl")
 
 include("dg_3d.jl")
+include("dg_3d_gpu.jl")
 include("dg_3d_parabolic.jl")
 include("dg_parallel.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index 5e9f21b0a0a..9a85a96b442 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -92,6 +92,13 @@ end
 function prolong2interfaces!(cache, u,
                              mesh::Union{P4estMesh{3}, T8codeMesh{3}},
                              equations, surface_integral, dg::DG)
+    backend = backend_or_nothing(cache.interfaces)
+    _prolong2interfaces!(backend, cache, u, mesh, equations, surface_integral, dg)
+end
+
+@inline function _prolong2interfaces!(backend::Nothing, cache, u,
+                                      mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                      equations, surface_integral, dg::DG)
     @unpack interfaces = cache
     index_range = eachnode(dg)
 
@@ -168,6 +175,15 @@ function calc_interface_flux!(surface_flux_values,
                               mesh::Union{P4estMesh{3}, T8codeMesh{3}},
                               nonconservative_terms,
                               equations, surface_integral, dg::DG, cache)
+    backend = backend_or_nothing(cache.interfaces)
+    _calc_interface_flux!(backend, surface_flux_values, mesh, nonconservative_terms,
+                          equations, surface_integral, dg, cache)
+end
+
+@inline function _calc_interface_flux!(backend::Nothing, surface_flux_values,
+                                       mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                       nonconservative_terms,
+                                       equations, surface_integral, dg::DG, cache)
     @unpack neighbor_ids, node_indices = cache.interfaces
     @unpack contravariant_vectors = cache.elements
     index_range = eachnode(dg)
@@ -318,6 +334,13 @@ end
 function prolong2boundaries!(cache, u,
                              mesh::Union{P4estMesh{3}, T8codeMesh{3}},
                              equations, surface_integral, dg::DG)
+    backend = backend_or_nothing(cache.boundaries)
+    _prolong2boundaries!(backend, cache, u, mesh, equations, surface_integral, dg)
+end
+
+@inline function _prolong2boundaries!(backend::Nothing, cache, u,
+                                      mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                      equations, surface_integral, dg::DG)
     @unpack boundaries = cache
     index_range = eachnode(dg)
 
@@ -359,6 +382,15 @@ end
 function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
                              mesh::Union{P4estMesh{3}, T8codeMesh{3}},
                              equations, surface_integral, dg::DG)
+    backend = backend_or_nothing(cache.boundaries)
+    _calc_boundary_flux!(backend, cache, t, boundary_condition, boundary_indexing, mesh,
+                         equations, surface_integral, dg)
+end
+
+@inline function _calc_boundary_flux!(backend::Nothing, cache, t,
+                                      boundary_condition, boundary_indexing,
+                                      mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                      equations, surface_integral, dg::DG)
     @unpack boundaries = cache
     @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements
     @unpack surface_flux = surface_integral
@@ -422,6 +454,14 @@ function prolong2mortars!(cache, u,
                           mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
                           mortar_l2::LobattoLegendreMortarL2,
                           surface_integral, dg::DGSEM)
+    backend = backend_or_nothing(cache.mortars)
+    _prolong2mortars!(backend, cache, u, mesh, equations, mortar_l2, surface_integral, dg)
+end
+
+@inline function _prolong2mortars!(backend::Nothing, cache, u,
+                                   mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
+                                   mortar_l2::LobattoLegendreMortarL2,
+                                   surface_integral, dg::DGSEM)
     @unpack fstar_tmp_threaded = cache
     @unpack neighbor_ids, node_indices = cache.mortars
     index_range = eachnode(dg)
@@ -527,6 +567,16 @@ function calc_mortar_flux!(surface_flux_values,
                            nonconservative_terms, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
+    backend = backend_or_nothing(cache.mortars)
+    _calc_mortar_flux!(backend, surface_flux_values, mesh, nonconservative_terms,
+                       equations, mortar_l2, surface_integral, dg, cache)
+end
+
+@inline function _calc_mortar_flux!(backend::Nothing, surface_flux_values,
+                                    mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                    nonconservative_terms, equations,
+                                    mortar_l2::LobattoLegendreMortarL2,
+                                    surface_integral, dg::DG, cache)
     @unpack neighbor_ids, node_indices = cache.mortars
     @unpack contravariant_vectors = cache.elements
     @unpack fstar_threaded, fstar_tmp_threaded = cache
@@ -734,6 +784,16 @@ function calc_surface_integral!(du, u,
                                 equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGSEM, cache)
+    backend = backend_or_nothing(cache.elements)
+    _calc_surface_integral!(backend, du, u, mesh, equations, surface_integral, dg, cache)
+
+end
+
+@inline function _calc_surface_integral!(backend::Nothing, du, u,
+                                         mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                         equations,
+                                         surface_integral::SurfaceIntegralWeakForm,
+                                         dg::DGSEM, cache)
     @unpack boundary_interpolation = dg.basis
     @unpack surface_flux_values = cache.elements
 
diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu.jl b/src/solvers/dgsem_p4est/dg_3d_gpu.jl
new file mode 100644
index 00000000000..9e99292aa1b
--- /dev/null
+++ b/src/solvers/dgsem_p4est/dg_3d_gpu.jl
@@ -0,0 +1,591 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+@inline function _calc_volume_integral!(backend::Backend, du, u, mesh::P4estMesh{3},
+                                        nonconservative_terms::False, equations,
+                                        volume_integral::VolumeIntegralWeakForm,
+                                        dg::DGSEM,
+                                        cache)
+    nelements(dg, cache) == 0 && return nothing
+    
+    @unpack derivative_dhat = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    nodes = eachnode(dg)
+    kernel! = _weak_form_kernel!(backend)
+
+    kernel!(du, u, equations, nodes, derivative_dhat, contravariant_vectors,
+            ndrange = nelements(dg, cache))
+    return nothing
+end
+
+@kernel function _weak_form_kernel!(du, u, equations, nodes, derivative_dhat,
+                                    contravariant_vectors, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    element = @index(Global)
+    NVARS = Val(nvariables(equations))
+
+    for k in nodes, j in nodes, i in nodes
+        u_node = get_svector(u, NVARS, i, j, k, element)
+
+        flux1 = flux(u_node, 1, equations)
+        flux2 = flux(u_node, 2, equations)
+        flux3 = flux(u_node, 3, equations)
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # first contravariant vector Ja^1 and the flux vector
+        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
+        for ii in nodes
+            multiply_add_to_first_axis!(du, alpha * derivative_dhat[ii, i],
+                                        contravariant_flux1, ii, j, k,
+                                        element)
+        end
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # second contravariant vector Ja^2 and the flux vector
+        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
+        for jj in nodes
+            multiply_add_to_first_axis!(du, alpha * derivative_dhat[jj, j],
+                                        contravariant_flux2, i, jj, k,
+                                        element)
+        end
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # third contravariant vector Ja^3 and the flux vector
+        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
+        for kk in nodes
+            multiply_add_to_first_axis!(du, alpha * derivative_dhat[kk, k],
+                                        contravariant_flux3, i, j, kk,
+                                        element)
+        end
+    end
+end
+
+@inline function _calc_volume_integral!(backend::Backend, du, u,
+                                        mesh::P4estMesh{3},
+                                        nonconservative_terms::False, equations,
+                                        volume_integral::VolumeIntegralFluxDifferencing,
+                                        dg::DGSEM, cache)
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    nodes = eachnode(dg)
+    kernel! = _flux_differencing_kernel!(backend)
+
+    kernel!(du, u, equations, volume_integral.volume_flux, nodes, derivative_split,
+            contravariant_vectors,
+            ndrange = nelements(dg, cache))
+    return nothing
+end
+
+@kernel function _flux_differencing_kernel!(du, u, equations,
+                                            volume_flux, nodes, derivative_split,
+                                            contravariant_vectors, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    element = @index(Global, Linear)
+    NVARS = Val(nvariables(equations))
+    num_nodes = length(nodes)
+
+    # Calculate volume integral in one element
+    for k in nodes, j in nodes, i in nodes
+        u_node = get_svector(u, NVARS, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):num_nodes
+            u_node_ii = get_svector(u, NVARS, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors,
+                                                   ii, j, k, element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[i, ii], fluxtilde1,
+                                        i, j, k, element)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[ii, i], fluxtilde1,
+                                        ii, j, k, element)
+        end
+
+        # y direction
+        for jj in (j + 1):num_nodes
+            u_node_jj = get_svector(u, NVARS, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors,
+                                                   i, jj, k, element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[j, jj], fluxtilde2,
+                                        i, j, k, element)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[jj, j], fluxtilde2,
+                                        i, jj, k, element)
+        end
+
+        # z direction
+        for kk in (k + 1):num_nodes
+            u_node_kk = get_svector(u, NVARS, i, j, kk, element)
+            # pull the contravariant vectors and compute the average
+            Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors,
+                                                   i, j, kk, element)
+            Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[k, kk], fluxtilde3,
+                                        i, j, k, element)
+            multiply_add_to_first_axis!(du, alpha * derivative_split[kk, k], fluxtilde3,
+                                        i, j, kk, element)
+        end
+    end
+end
+
+@inline function _prolong2interfaces!(backend::Backend, cache, u,
+                                      mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                      equations, surface_integral, dg::DG)
+    @unpack interfaces = cache
+    ninterfaces(interfaces) == 0 && return nothing
+
+    nodes = eachnode(dg)
+    kernel! = prolong2interfaces_kernel!(backend)
+
+    kernel!(interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, u,
+            Val(nvariables(equations)), nodes,
+            ndrange = ninterfaces(interfaces))
+    return nothing
+end
+
+@kernel function prolong2interfaces_kernel!(u_interfaces, neighbor_ids, node_indices, u,
+                                            ::Val{NVARS}, nodes) where {NVARS}
+    interface = @index(Global, Linear)
+    # Copy solution data from the primary element using "delayed indexing" with
+    # a start value and two step sizes to get the correct face and orientation.
+    # Note that in the current implementation, the interface will be
+    # "aligned at the primary element", i.e., the indices of the primary side
+    # will always run forwards.
+    primary_element = neighbor_ids[1, interface]
+    primary_indices = node_indices[1, interface]
+
+    i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                 nodes)
+    j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                 nodes)
+    k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                 nodes)
+
+    i_primary = i_primary_start
+    j_primary = j_primary_start
+    k_primary = k_primary_start
+    for j in nodes
+        for i in nodes
+            for v in 1:NVARS
+                u_interfaces[1, v, i, j, interface] = u[v, i_primary, j_primary,
+                                                        k_primary, primary_element]
+            end
+            i_primary += i_primary_step_i
+            j_primary += j_primary_step_i
+            k_primary += k_primary_step_i
+        end
+        i_primary += i_primary_step_j
+        j_primary += j_primary_step_j
+        k_primary += k_primary_step_j
+    end
+
+    # Copy solution data from the secondary element using "delayed indexing" with
+    # a start value and two step sizes to get the correct face and orientation.
+    secondary_element = neighbor_ids[2, interface]
+    secondary_indices = node_indices[2, interface]
+
+    i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1],
+                                                                                       nodes)
+    j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2],
+                                                                                       nodes)
+    k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3],
+                                                                                       nodes)
+
+    i_secondary = i_secondary_start
+    j_secondary = j_secondary_start
+    k_secondary = k_secondary_start
+    for j in nodes
+        for i in nodes
+            for v in 1:NVARS
+                u_interfaces[2, v, i, j, interface] = u[v, i_secondary, j_secondary,
+                                                        k_secondary,
+                                                        secondary_element]
+            end
+            i_secondary += i_secondary_step_i
+            j_secondary += j_secondary_step_i
+            k_secondary += k_secondary_step_i
+        end
+        i_secondary += i_secondary_step_j
+        j_secondary += j_secondary_step_j
+        k_secondary += k_secondary_step_j
+    end
+end
+
+@inline function _calc_interface_flux!(backend::Backend, surface_flux_values,
+                                       mesh::P4estMesh{3},
+                                       nonconservative_terms::False,
+                                       equations, surface_integral, dg::DG, cache)
+    @unpack interfaces = cache
+    ninterfaces(interfaces) == 0 && return nothing
+
+    @unpack neighbor_ids, node_indices = interfaces
+    @unpack contravariant_vectors = cache.elements
+    nodes = eachnode(dg)
+    kernel! = interface_flux_kernel!(backend)
+
+    kernel!(surface_flux_values, equations, surface_integral.surface_flux, nodes,
+            interfaces.u, neighbor_ids, node_indices, contravariant_vectors,
+            ndrange = ninterfaces(interfaces))
+    return nothing
+end
+
+@kernel function interface_flux_kernel!(surface_flux_values, equations, surface_flux, nodes,
+                                        u_interfaces, neighbor_ids, node_indices,
+                                        contravariant_vectors)
+    interface = @index(Global, Linear)
+    NVARS = Val(nvariables(equations))
+
+    # Get element and side information on the primary element
+    primary_element = neighbor_ids[1, interface]
+    primary_indices = node_indices[1, interface]
+    primary_direction = indices2direction(primary_indices)
+
+    i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                 nodes)
+    j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                 nodes)
+    k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                 nodes)
+
+    i_primary = i_primary_start
+    j_primary = j_primary_start
+    k_primary = k_primary_start
+
+    # Get element and side information on the secondary element
+    secondary_element = neighbor_ids[2, interface]
+    secondary_indices = node_indices[2, interface]
+    secondary_direction = indices2direction(secondary_indices)
+    secondary_surface_indices = surface_indices(secondary_indices)
+
+    # Get the surface indexing on the secondary element.
+    # Note that the indices of the primary side will always run forward but
+    # the secondary indices might need to run backwards for flipped sides.
+    i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1],
+                                                                                       nodes)
+    j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2],
+                                                                                       nodes)
+    i_secondary = i_secondary_start
+    j_secondary = j_secondary_start
+
+    for j in nodes
+        for i in nodes
+            # Get the normal direction from the primary element.
+            # Note, contravariant vectors at interfaces in negative coordinate direction
+            # are pointing inwards. This is handled by `get_normal_direction`.
+            normal_direction = get_normal_direction(primary_direction,
+                                                    contravariant_vectors,
+                                                    i_primary, j_primary, k_primary,
+                                                    primary_element)
+            u_ll, u_rr = get_svectors(u_interfaces, NVARS, i, j, interface)
+
+            flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, primary_direction, primary_element] = flux_[v]
+                surface_flux_values[v, i_secondary, j_secondary,
+                secondary_direction, secondary_element] = -flux_[v]
+            end
+
+            # Increment the primary element indices
+            i_primary += i_primary_step_i
+            j_primary += j_primary_step_i
+            k_primary += k_primary_step_i
+            # Increment the secondary element surface indices
+            i_secondary += i_secondary_step_i
+            j_secondary += j_secondary_step_i
+        end
+        # Increment the primary element indices
+        i_primary += i_primary_step_j
+        j_primary += j_primary_step_j
+        k_primary += k_primary_step_j
+        # Increment the secondary element surface indices
+        i_secondary += i_secondary_step_j
+        j_secondary += j_secondary_step_j
+    end
+end
+
+@inline function _prolong2boundaries!(backend::Backend, cache, u, mesh::P4estMesh{3},
+                                      equations, surface_integral, dg::DG)
+    @unpack boundaries = cache
+    nboundaries(boundaries) == 0 && return nothing
+
+    nodes = eachnode(dg)
+    kernel! = prolong2boundaries_kernel!(backend)
+
+    kernel!(boundaries.u, boundaries.neighbor_ids, boundaries.node_indices, u,
+            Val(nvariables(equations)), nodes,
+            ndrange = nboundaries(boundaries))
+    return nothing
+end
+
+@kernel function prolong2boundaries_kernel!(u_boundaries, neighbor_ids, _node_indices, u,
+                                            ::Val{NVARS}, nodes) where {NVARS}
+    boundary = @index(Global, Linear)
+
+    # Copy solution data from the element using "delayed indexing" with
+    # a start value and two step sizes to get the correct face and orientation.
+    element = neighbor_ids[boundary]
+    node_indices = _node_indices[boundary]
+
+    i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                        nodes)
+    j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                        nodes)
+    k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                        nodes)
+
+    i_node = i_node_start
+    j_node = j_node_start
+    k_node = k_node_start
+    for j in nodes
+        for i in nodes
+            for v in 1:NVARS
+                u_boundaries[v, i, j, boundary] = u[v, i_node, j_node, k_node,
+                                                    element]
+            end
+            i_node += i_node_step_i
+            j_node += j_node_step_i
+            k_node += k_node_step_i
+        end
+        i_node += i_node_step_j
+        j_node += j_node_step_j
+        k_node += k_node_step_j
+    end
+end
+
+@inline function _calc_boundary_flux!(backend::Backend, cache, t,
+                                      boundary_condition, boundary_indexing,
+                                      mesh::Union{P4estMesh{3}, T8codeMesh{3}},
+                                      equations, surface_integral, dg::DG)
+    @unpack boundaries, elements = cache
+    nboundaries(boundaries) == 0 && return nothing
+
+    @unpack neighbor_ids, node_indices = boundaries
+    @unpack surface_flux_values, node_coordinates, contravariant_vectors = elements
+    nodes = eachnode(dg)
+    kernel! = boundary_flux_kernel!(backend)
+
+    kernel!(surface_flux_values, t, boundary_condition, boundary_indexing, equations,
+            surface_integral.surface_flux, nodes,
+            boundaries.u, neighbor_ids, node_indices,
+            node_coordinates, contravariant_vectors,
+            ndrange = nboundaries(boundaries))
+    return nothing
+end
+
+@kernel function boundary_flux_kernel!(surface_flux_values, t,
+                                       boundary_condition, boundary_indexing, equations,
+                                       surface_flux, nodes,
+                                       u_boundaries, neighbor_ids, _node_indices,
+                                       node_coordinates, contravariant_vectors)
+    local_index = @index(Global, Linear)
+    NVARS = Val(nvariables(equations))
+    boundary = boundary_indexing[local_index]
+
+    # Get information on the adjacent element, compute the surface fluxes,
+    # and store them
+    element = neighbor_ids[boundary]
+    node_indices = _node_indices[boundary]
+    direction = indices2direction(node_indices)
+
+    i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                        nodes)
+    j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                        nodes)
+    k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                        nodes)
+
+    i_node = i_node_start
+    j_node = j_node_start
+    k_node = k_node_start
+    for j in nodes
+        for i in nodes
+            # Extract solution data from boundary container
+            u_inner = get_svector(u_boundaries, NVARS, i, j, boundary)
+
+            # Outward-pointing normal direction (not normalized)
+            normal_direction = get_normal_direction(direction,
+                                                    contravariant_vectors,
+                                                    i_node, j_node, k_node, element)
+
+            # Coordinates at boundary node
+            x = get_svector(node_coordinates, Val(3), i_node, j_node, k_node, element)
+
+            flux_ = boundary_condition(u_inner, normal_direction, x, t,
+                                       surface_flux, equations)
+
+            # Copy flux to element storage in the correct orientation
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, direction, element] = flux_[v]
+            end
+
+            i_node += i_node_step_i
+            j_node += j_node_step_i
+            k_node += k_node_step_i
+        end
+        i_node += i_node_step_j
+        j_node += j_node_step_j
+        k_node += k_node_step_j
+    end
+end
+
+@inline function _prolong2mortars!(backend::Backend, cache, u,
+                                   mesh::P4estMesh{3}, equations,
+                                   mortar_l2::LobattoLegendreMortarL2,
+                                   surface_integral, dg::DGSEM)
+    if nmortars(dg, cache) > 0
+        error("mortars currently not supported by KA.jl P4estMesh solver")
+    end
+    return nothing
+end
+
+@inline function _calc_mortar_flux!(backend::Backend, surface_flux_values,
+                                    mesh::P4estMesh{3},
+                                    nonconservative_terms, equations,
+                                    mortar_l2::LobattoLegendreMortarL2,
+                                    surface_integral, dg::DG, cache)
+    return nothing
+end
+
+@inline function _calc_surface_integral!(backend::Backend, du, u,
+                                         mesh::P4estMesh{3},
+                                         equations,
+                                         surface_integral::SurfaceIntegralWeakForm,
+                                         dg::DGSEM, cache)
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+    nodes = eachnode(dg)
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    kernel! = surface_integral_kernel!(backend)
+
+    kernel!(du, u, Val(nvariables(equations)), factor_1, factor_2, nodes,
+            surface_flux_values, ndrange = nelements(cache.elements))
+    return nothing
+end
+
+@kernel function surface_integral_kernel!(du, u, ::Val{NVARS},
+                                          boundary_interp_factor_1, boundary_interp_factor_2,
+                                          nodes, surface_flux_values) where {NVARS}
+    element = @index(Global, Linear)
+    num_nodes = length(nodes)
+    for m in nodes, l in nodes
+        for v in 1:NVARS
+            # surface at -x
+            du[v, 1, l, m, element] = (du[v, 1, l, m, element] +
+                                        surface_flux_values[v, l, m, 1, element] *
+                                        boundary_interp_factor_1)
+
+            # surface at +x
+            du[v, num_nodes, l, m, element] = (du[v, num_nodes, l, m, element] +
+                                                surface_flux_values[v, l, m, 2,
+                                                                    element] *
+                                                boundary_interp_factor_2)
+
+            # surface at -y
+            du[v, l, 1, m, element] = (du[v, l, 1, m, element] +
+                                        surface_flux_values[v, l, m, 3, element] *
+                                        boundary_interp_factor_1)
+
+            # surface at +y
+            du[v, l, num_nodes, m, element] = (du[v, l, num_nodes, m, element] +
+                                                surface_flux_values[v, l, m, 4,
+                                                                    element] *
+                                                boundary_interp_factor_2)
+
+            # surface at -z
+            du[v, l, m, 1, element] = (du[v, l, m, 1, element] +
+                                        surface_flux_values[v, l, m, 5, element] *
+                                        boundary_interp_factor_1)
+
+            # surface at +z
+            du[v, l, m, num_nodes, element] = (du[v, l, m, num_nodes, element] +
+                                                surface_flux_values[v, l, m, 6,
+                                                                    element] *
+                                                boundary_interp_factor_2)
+        end
+    end
+end
+
+@inline function _apply_jacobian!(backend::Backend, du, mesh::P4estMesh{3},
+                                  equations, dg::DG, cache)
+    NVARS = Val(nvariables(equations))
+    nodes = eachnode(dg)
+    @unpack inverse_jacobian = cache.elements
+    kernel! = _apply_jacobian_kernel!(backend)
+
+    kernel!(du, inverse_jacobian, NVARS, nodes, ndrange = nelements(cache.elements))
+    return nothing
+end
+
+@kernel function _apply_jacobian_kernel!(du, inverse_jacobian,
+                                         ::Val{NVARS}, nodes) where {NVARS}
+    element = @index(Global, Linear)
+    for k in nodes, j in nodes, i in nodes
+        factor = -inverse_jacobian[i, j, k, element]
+        for v in 1:NVARS
+            du[v, i, j, k, element] *= factor
+        end
+    end
+end
+
+@inline function _calc_sources!(backend::Backend, du, u, t, source_terms,
+                                equations::AbstractEquations{3}, dg::DG, cache)
+    @unpack node_coordinates = cache.elements
+    NVARS = Val(nvariables(equations))
+    nodes = eachnode(dg)
+    kernel! = _calc_sources_kernel!(backend)
+
+    kernel!(du, u, t, source_terms, equations, NVARS, nodes, node_coordinates;
+            ndrange = nelements(cache.elements))
+
+    return nothing
+end
+
+@kernel function _calc_sources_kernel!(du, u, t, source_terms, equations, NVARS,
+                                       nodes, node_coordinates)
+    element = @index(Global, Linear)
+    for k in nodes, j in nodes, i in nodes
+        u_local = get_svector(u, NVARS, i, j, k, element)
+        x_local = get_svector(node_coordinates, Val(3), i, j, k, element)
+        du_local = source_terms(u_local, x_local, t, equations)
+        add_to_first_axis!(du, du_local, i, j, k, element)
+    end
+end
+end # @muladd
\ No newline at end of file
diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
new file mode 100644
index 00000000000..3ef977af499
--- /dev/null
+++ b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
@@ -0,0 +1,178 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+@inline function _prolong2mpiinterfaces!(backend::Backend, cache, u,
+                                         mesh::P4estMesh{3},
+                                         equations, surface_integral, dg::DG)
+    @unpack mpi_interfaces = cache
+    nmpiinterfaces(mpi_interfaces) == 0 && return nothing
+
+    nodes = eachnode(dg)
+    kernel! = prolong2mpiinterfaces_kernel!(backend)
+
+    kernel!(mpi_interfaces.u, mpi_interfaces.local_sides,
+            mpi_interfaces.local_neighbor_ids,
+            mpi_interfaces.node_indices, u, Val(nvariables(equations)), nodes,
+            ndrange = nmpiinterfaces(mpi_interfaces))
+    return nothing
+end
+
+@kernel function prolong2mpiinterfaces_kernel!(u_mpi_interfaces, local_sides,
+                                               local_neighbor_ids,
+                                               node_indices, u, ::Val{NVARS},
+                                               nodes) where {NVARS}
+    interface = @index(Global, Linear)
+    # Copy solution data from the local element using "delayed indexing" with
+    # a start value and a step size to get the correct face and orientation.
+    # Note that in the current implementation, the interface will be
+    # "aligned at the primary element", i.e., the index of the primary side
+    # will always run forwards.
+    local_side = local_sides[interface]
+    local_element = local_neighbor_ids[interface]
+    local_indices = node_indices[interface]
+
+    i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
+                                                                                 nodes)
+    j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
+                                                                                 nodes)
+    k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
+                                                                                 nodes)
+
+    i_element = i_element_start
+    j_element = j_element_start
+    k_element = k_element_start
+    for j in nodes
+        for i in nodes
+            for v in 1:NVARS
+                u_mpi_interfaces[local_side, v, i, j, interface] = u[v, i_element,
+                                                                     j_element,
+                                                                     k_element,
+                                                                     local_element]
+            end
+            i_element += i_element_step_i
+            j_element += j_element_step_i
+            k_element += k_element_step_i
+        end
+        i_element += i_element_step_j
+        j_element += j_element_step_j
+        k_element += k_element_step_j
+    end
+end
+
+@inline function _calc_mpi_interface_flux!(backend::Backend, surface_flux_values,
+                                           mesh::ParallelP4estMesh{3},
+                                           nonconservative_terms::False,
+                                           equations, surface_integral, dg::DG, cache)
+    @unpack mpi_interfaces = cache
+    nmpiinterfaces(mpi_interfaces) == 0 && return nothing
+
+    @unpack local_neighbor_ids, node_indices, local_sides = mpi_interfaces
+    @unpack contravariant_vectors = cache.elements
+    nodes = eachnode(dg)
+    kernel! = mpi_interface_flux_kernel!(backend)
+
+    kernel!(surface_flux_values, equations, surface_integral.surface_flux, nodes,
+            mpi_interfaces.u, local_neighbor_ids, node_indices, local_sides,
+            contravariant_vectors, ndrange = nmpiinterfaces(mpi_interfaces))
+    return nothing
+end
+
+@kernel function mpi_interface_flux_kernel!(surface_flux_values, equations,
+                                            surface_flux, nodes,
+                                            u_mpi_interfaces, local_neighbor_ids,
+                                            node_indices, local_sides,
+                                            contravariant_vectors)
+    interface = @index(Global, Linear)
+    NVARS = Val(nvariables(equations))
+
+    # Get element and side index information on the local element
+    local_element = local_neighbor_ids[interface]
+    local_indices = node_indices[interface]
+    local_direction = indices2direction(local_indices)
+    local_side = local_sides[interface]
+
+    # Create the local i,j,k indexing on the local element used to pull normal direction information
+    i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
+                                                                                 nodes)
+    j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
+                                                                                 nodes)
+    k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
+                                                                                 nodes)
+
+    i_element = i_element_start
+    j_element = j_element_start
+    k_element = k_element_start
+
+    # Initiate the node indices to be used in the surface for loop,
+    # the surface flux storage must be indexed in alignment with the local element indexing
+    local_surface_indices = surface_indices(local_indices)
+    i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1],
+                                                                                 nodes)
+    j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2],
+                                                                                 nodes)
+    i_surface = i_surface_start
+    j_surface = j_surface_start
+
+    for j in nodes
+        for i in nodes
+            # Get the normal direction on the local element
+            # Contravariant vectors at interfaces in negative coordinate direction
+            # are pointing inwards. This is handled by `get_normal_direction`.
+            normal_direction = get_normal_direction(local_direction,
+                                                    contravariant_vectors,
+                                                    i_element, j_element, k_element,
+                                                    local_element)
+            u_ll, u_rr = get_svectors(u_mpi_interfaces, NVARS, i, j, interface)
+
+            if local_side == 1
+                flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+            else # local_side == 2
+                flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
+            end
+
+            for v in 1:nvariables(equations)
+                surface_flux_values[v, i_surface, j_surface,
+                local_direction, local_element] = flux_[v]
+            end
+
+            # Increment local element indices to pull the normal direction
+            i_element += i_element_step_i
+            j_element += j_element_step_i
+            k_element += k_element_step_i
+            # Increment the surface node indices along the local element
+            i_surface += i_surface_step_i
+            j_surface += j_surface_step_i
+        end
+        # Increment local element indices to pull the normal direction
+        i_element += i_element_step_j
+        j_element += j_element_step_j
+        k_element += k_element_step_j
+        # Increment the surface node indices along the local element
+        i_surface += i_surface_step_j
+        j_surface += j_surface_step_j
+    end
+end
+
+@inline function _prolong2mpimortars!(backend::Backend, cache, u, 
+                                      mesh::ParallelP4estMesh{3},
+                                      equations,
+                                      mortar_l2::LobattoLegendreMortarL2,
+                                      surface_integral, dg::DGSEM)
+    if nmpimortars(dg, cache) > 0
+        error("mortars currently not supported by KA.jl P4estMesh solver")
+    end
+    return nothing
+end
+
+@inline function _calc_mpi_mortar_flux!(backend::Backend, surface_flux_values,
+                                        mesh::ParallelP4estMesh{3},
+                                        nonconservative_terms, equations,
+                                        mortar_l2::LobattoLegendreMortarL2,
+                                        surface_integral, dg::DG, cache)
+    return nothing
+end
+end # @muladd
\ No newline at end of file
diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
index e504e06d2c4..190717e2ab3 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
@@ -109,6 +109,10 @@ function rhs!(du, u, t,
     # Finish to send MPI data
     @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
 
+    if mesh isa P4estMesh && uses_ka(cache.elements)
+        synchronize(get_backend(cache.elements))
+    end
+
     return nothing
 end
 
@@ -116,6 +120,14 @@ function prolong2mpiinterfaces!(cache, u,
                                 mesh::Union{ParallelP4estMesh{3},
                                             ParallelT8codeMesh{3}},
                                 equations, surface_integral, dg::DG)
+    backend = backend_or_nothing(cache.mpi_interfaces)
+    _prolong2mpiinterfaces!(backend, cache, u, mesh, equations, surface_integral, dg)
+end
+
+@inline function _prolong2mpiinterfaces!(backend::Nothing, cache, u,
+                                         mesh::Union{ParallelP4estMesh{3},
+                                                     ParallelT8codeMesh{3}},
+                                         equations, surface_integral, dg::DG)
     @unpack mpi_interfaces = cache
     index_range = eachnode(dg)
 
@@ -165,6 +177,16 @@ function calc_mpi_interface_flux!(surface_flux_values,
                                               ParallelT8codeMesh{3}},
                                   nonconservative_terms,
                                   equations, surface_integral, dg::DG, cache)
+    backend = backend_or_nothing(cache.mpi_interfaces)
+    _calc_mpi_interface_flux!(backend, surface_flux_values, mesh, nonconservative_terms,
+                              equations, surface_integral, dg, cache)
+end
+
+@inline function _calc_mpi_interface_flux!(backend::Nothing, surface_flux_values,
+                                           mesh::Union{ParallelP4estMesh{3},
+                                                       ParallelT8codeMesh{3}},
+                                           nonconservative_terms,
+                                           equations, surface_integral, dg::DG, cache)
     @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
     @unpack contravariant_vectors = cache.elements
     index_range = eachnode(dg)
@@ -272,6 +294,16 @@ function prolong2mpimortars!(cache, u,
                              equations,
                              mortar_l2::LobattoLegendreMortarL2,
                              surface_integral, dg::DGSEM)
+    backend = backend_or_nothing(cache.mpi_mortars)
+    _prolong2mpimortars!(backend, cache, u, mesh, equations,
+                         mortar_l2, surface_integral, dg)
+end
+
+@inline function _prolong2mpimortars!(backend::Nothing, cache, u,
+                                      mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
+                                      equations,
+                                      mortar_l2::LobattoLegendreMortarL2,
+                                      surface_integral, dg::DGSEM)
     @unpack node_indices = cache.mpi_mortars
     index_range = eachnode(dg)
 
@@ -382,6 +414,16 @@ function calc_mpi_mortar_flux!(surface_flux_values,
                                nonconservative_terms, equations,
                                mortar_l2::LobattoLegendreMortarL2,
                                surface_integral, dg::DG, cache)
+    backend = backend_or_nothing(cache.mpi_mortars)
+    _calc_mpi_mortar_flux!(backend, surface_flux_values, mesh, nonconservative_terms, equations,
+                           mortar_l2, surface_integral, dg, cache)
+end
+
+@inline function _calc_mpi_mortar_flux!(backend::Nothing, surface_flux_values,
+                                        mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
+                                        nonconservative_terms, equations,
+                                        mortar_l2::LobattoLegendreMortarL2,
+                                        surface_integral, dg::DG, cache)
     @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
     @unpack contravariant_vectors = cache.elements
     @unpack fstar_threaded, fstar_tmp_threaded = cache
diff --git a/src/solvers/dgsem_p4est/dg_gpu_parallel.jl b/src/solvers/dgsem_p4est/dg_gpu_parallel.jl
new file mode 100644
index 00000000000..a4041652523
--- /dev/null
+++ b/src/solvers/dgsem_p4est/dg_gpu_parallel.jl
@@ -0,0 +1,76 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# TODO: Support MPI mortars
+@inline function _start_mpi_send!(backend::Backend, mpi_cache::P4estMPICache,
+                                  mesh::ParallelP4estMesh{3}, equations, dg, cache)
+    @unpack mpi_neighbor_ranks, mpi_neighbor_interfaces = mpi_cache
+    @unpack mpi_send_buffers, mpi_send_requests = mpi_cache
+    @unpack mpi_interfaces = cache
+    @unpack local_sides = mpi_interfaces
+
+    kernel! = copy_to_mpi_send!(backend)
+
+    for (index, d) in enumerate(mpi_neighbor_ranks)
+        send_buffer = mpi_send_buffers[index]
+        neighbor_interfaces = mpi_neighbor_interfaces[index]
+        kernel!(send_buffer, neighbor_interfaces, local_sides, mpi_interfaces.u,
+                Val(nvariables(equations)), Val(ndims(mesh)),
+                ndrange = (nnodes(dg), nnodes(dg), length(neighbor_interfaces)))
+        synchronize(backend)
+        mpi_send_requests[index] = MPI.Isend(send_buffer, d, mpi_rank(), mpi_comm())
+    end
+end
+
+@kernel function copy_to_mpi_send!(send_buffer, neighbor_interfaces, local_sides,
+                                   u_mpi_interfaces, ::Val{NVARS},
+                                   ::Val{3}) where {NVARS}
+    i, j, k = @index(Global, NTuple)
+    I = @index(Global, Linear)
+    buf_idx = (I - 1) * NVARS
+    interface = neighbor_interfaces[k]
+    local_side = local_sides[interface]
+    for v in 1:NVARS
+        send_buffer[buf_idx + v] = u_mpi_interfaces[local_side, v, i, j, interface]
+    end
+end
+
+@inline function _finish_mpi_receive!(backend::Backend, mpi_cache::P4estMPICache,
+                                      mesh, equations, dg, cache)
+    @unpack mpi_neighbor_ranks, mpi_neighbor_interfaces = mpi_cache
+    @unpack mpi_recv_buffers, mpi_recv_requests = mpi_cache
+    @unpack mpi_interfaces = cache
+    @unpack local_sides = mpi_interfaces
+
+    kernel! = copy_from_mpi_recv!(backend)
+
+    d = MPI.Waitany(mpi_recv_requests)
+    while d !== nothing
+        recv_buffer = mpi_recv_buffers[d]
+        neighbor_interfaces = mpi_neighbor_interfaces[d]
+        kernel!(recv_buffer, neighbor_interfaces, local_sides, mpi_interfaces.u,
+                Val(nvariables(equations)), Val(ndims(mesh)),
+                ndrange = (nnodes(dg), nnodes(dg), length(neighbor_interfaces)))
+
+        d = MPI.Waitany(mpi_recv_requests)
+    end
+    synchronize(backend)
+end
+
+@kernel function copy_from_mpi_recv!(recv_buffer, neighbor_interfaces, local_sides,
+                                     u_mpi_interfaces, ::Val{NVARS},
+                                     ::Val{3}) where {NVARS}
+    i, j, k = @index(Global, NTuple)
+    I = @index(Global, Linear)
+    buf_idx = (I - 1) * NVARS
+    interface = neighbor_interfaces[k]
+    remote_side = local_sides[interface] == 1 ? 2 : 1
+    for v in 1:NVARS
+        u_mpi_interfaces[remote_side, v, i, j, interface] = recv_buffer[buf_idx + v]
+    end
+end
+end # @muladd
\ No newline at end of file
diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl
index 8664e6a2bae..65bb14e775e 100644
--- a/src/solvers/dgsem_p4est/dg_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_parallel.jl
@@ -48,8 +48,8 @@ end
 
 function Adapt.adapt_structure(to, mpi_cache::P4estMPICache)
     mpi_neighbor_ranks = mpi_cache.mpi_neighbor_ranks
-    mpi_neighbor_interfaces = mpi_cache.mpi_neighbor_interfaces
-    mpi_neighbor_mortars = mpi_cache.mpi_neighbor_mortars
+    mpi_neighbor_interfaces = Adapt.adapt_structure(to, mpi_cache.mpi_neighbor_interfaces)
+    mpi_neighbor_mortars = Adapt.adapt_structure(to, mpi_cache.mpi_neighbor_mortars)
     mpi_send_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_send_buffers)
     mpi_recv_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_recv_buffers)
     mpi_send_requests = mpi_cache.mpi_send_requests
@@ -60,14 +60,21 @@ function Adapt.adapt_structure(to, mpi_cache::P4estMPICache)
 
     @assert eltype(mpi_send_buffers) == eltype(mpi_recv_buffers)
     BufferType = eltype(mpi_send_buffers)
-    return P4estMPICache{BufferType}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
-                                     mpi_neighbor_mortars, mpi_send_buffers,
-                                     mpi_recv_buffers, mpi_send_requests,
-                                     mpi_recv_requests, n_elements_by_rank,
-                                     n_elements_global, first_element_global_id)
+    VecInt = eltype(mpi_neighbor_interfaces)
+    return P4estMPICache{BufferType, VecInt}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
+                                             mpi_neighbor_mortars, mpi_send_buffers,
+                                             mpi_recv_buffers, mpi_send_requests,
+                                             mpi_recv_requests, n_elements_by_rank,
+                                             n_elements_global, first_element_global_id)
 end
 
 function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
+    backend = backend_or_nothing(cache.elements)
+    _start_mpi_send!(backend, mpi_cache, mesh, equations, dg, cache)
+end
+
+@inline function _start_mpi_send!(backend::Nothing, mpi_cache::P4estMPICache,
+                                  mesh, equations, dg, cache)
     data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
     n_small_elements = 2^(ndims(mesh) - 1)
 
@@ -135,6 +142,12 @@ function finish_mpi_send!(mpi_cache::P4estMPICache)
 end
 
 function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
+    backend = backend_or_nothing(cache.elements)
+    _finish_mpi_receive!(backend, mpi_cache, mesh, equations, dg, cache)
+end
+
+@inline function _finish_mpi_receive!(backend::Nothing, mpi_cache::P4estMPICache,
+                                      mesh, equations, dg, cache)
     data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
     n_small_elements = 2^(ndims(mesh) - 1)
     n_positions = n_small_elements + 1
@@ -607,4 +620,6 @@ end
 
 include("dg_2d_parallel.jl")
 include("dg_3d_parallel.jl")
+include("dg_gpu_parallel.jl")
+include("dg_3d_gpu_parallel.jl")
 end # muladd
diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl
index 1df9f408895..401f20cb8f6 100644
--- a/src/solvers/dgsem_structured/dg_3d.jl
+++ b/src/solvers/dgsem_structured/dg_3d.jl
@@ -790,6 +790,14 @@ end
 function apply_jacobian!(du,
                          mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
                          equations, dg::DG, cache)
+    backend = backend_or_nothing(cache.elements)
+    _apply_jacobian!(backend, du, mesh, equations, dg, cache)
+    return nothing
+end
+
+@inline function _apply_jacobian!(::Nothing, du,
+                                  mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
+                                  equations, dg::DG, cache)
     @threaded for element in eachelement(dg, cache)
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
             factor = -cache.elements.inverse_jacobian[i, j, k, element]
diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
index 0993b3c9b85..251fce8c958 100644
--- a/src/solvers/dgsem_tree/dg.jl
+++ b/src/solvers/dgsem_tree/dg.jl
@@ -8,11 +8,20 @@
 # du .= zero(eltype(du)) doesn't scale when using multiple threads.
 # See https://github.com/trixi-framework/Trixi.jl/pull/924 for a performance comparison.
 function reset_du!(du, dg, cache)
+    backend = backend_or_nothing(cache.elements)
+    _reset_du!(backend, du, dg, cache)
+    return du
+end
+
+@inline function _reset_du!(::Union{Nothing, CPU}, du, dg,
+                            cache)
     @threaded for element in eachelement(dg, cache)
         du[.., element] .= zero(eltype(du))
     end
+end
 
-    return du
+@inline function _reset_du!(::Backend, du, dg, cache)
+    fill!(du, zero(eltype(du)))
 end
 
 #     pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl
index 02ff338e912..4ac8e8c832f 100644
--- a/src/solvers/dgsem_tree/dg_3d.jl
+++ b/src/solvers/dgsem_tree/dg_3d.jl
@@ -208,6 +208,10 @@ function rhs!(du, u, t,
         calc_sources!(du, u, t, source_terms, equations, dg, cache)
     end
 
+    if mesh isa P4estMesh && uses_ka(cache.elements)
+        synchronize(get_backend(cache.elements))
+    end
+
     return nothing
 end
 
@@ -217,6 +221,18 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm,
                                dg::DGSEM, cache)
+    backend = backend_or_nothing(cache.elements)
+    _calc_volume_integral!(backend, du, u, mesh, nonconservative_terms, equations,
+                           volume_integral, dg, cache)
+    return nothing
+end
+
+@inline function _calc_volume_integral!(backend::Nothing, du, u,
+                                        mesh::Union{TreeMesh{3}, StructuredMesh{3},
+                                                    P4estMesh{3}, T8codeMesh{3}},
+                                        nonconservative_terms, equations,
+                                        volume_integral::VolumeIntegralWeakForm,
+                                        dg::DGSEM, cache)
     @threaded for element in eachelement(dg, cache)
         weak_form_kernel!(du, u, element, mesh,
                           nonconservative_terms, equations,
@@ -267,11 +283,22 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
 end
 
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3},
-                                           T8codeMesh{3}},
+                               mesh::Union{TreeMesh{3}, StructuredMesh{3},
+                                           P4estMesh{3}, T8codeMesh{3}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGSEM, cache)
+    backend = backend_or_nothing(cache.elements)
+    _calc_volume_integral!(backend, du, u, mesh, nonconservative_terms, equations,
+                           volume_integral, dg, cache)
+end
+
+@inline function _calc_volume_integral!(backend::Nothing, du, u,
+                                        mesh::Union{TreeMesh{3}, StructuredMesh{3},
+                                                    P4estMesh{3}, T8codeMesh{3}},
+                                        nonconservative_terms, equations,
+                                        volume_integral::VolumeIntegralFluxDifferencing,
+                                        dg::DGSEM, cache)
     @threaded for element in eachelement(dg, cache)
         flux_differencing_kernel!(du, u, element, mesh,
                                   nonconservative_terms, equations,
@@ -1385,6 +1412,12 @@ end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{3}, dg::DG, cache)
+    backend = backend_or_nothing(cache.elements)
+    _calc_sources!(backend, du, u, t, source_terms, equations, dg, cache)
+end
+
+@inline function _calc_sources!(::Nothing, du, u, t, source_terms,
+                                equations::AbstractEquations{3}, dg::DG, cache)
     @unpack node_coordinates = cache.elements
 
     @threaded for element in eachelement(dg, cache)

From 3bbd3a15538240977dcdef1b5c6127b24a1c5650 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:07:08 +0200
Subject: [PATCH 59/89] Update
 examples/t8code_2d_dgsem/elixir_advection_restart.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 examples/t8code_2d_dgsem/elixir_advection_restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart.jl b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
index 0f573714c1f..6fb14c60038 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
@@ -6,7 +6,7 @@ using Trixi
 # create a restart file
 
 elixir_file = "elixir_advection_extended.jl"
-restart_file = "restart_000021.h5"
+restart_file = "restart_000000021.h5"
 
 trixi_include(@__MODULE__, joinpath(@__DIR__, elixir_file))
 

From ab9c33f020e8160b8fb136216a01a6fd95e96214 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:07:16 +0200
Subject: [PATCH 60/89] Update
 examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
index 68fa726227b..2c0f14c8d10 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
@@ -6,7 +6,7 @@ using Trixi
 # create a restart file
 
 elixir_file = "elixir_advection_extended.jl"
-restart_file = "restart_000021.h5"
+restart_file = "restart_000000021.h5"
 
 trixi_include(@__MODULE__, joinpath(@__DIR__, elixir_file))
 

From 9e49219a248f131178b051097acd5fb085e0a909 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:07:27 +0200
Subject: [PATCH 61/89] Update
 examples/t8code_3d_dgsem/elixir_advection_restart.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 examples/t8code_3d_dgsem/elixir_advection_restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/t8code_3d_dgsem/elixir_advection_restart.jl b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
index b3dead42399..9d19d81cf47 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
@@ -14,7 +14,7 @@ trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_basic.jl"),
 # Note: If you get a restart file from somewhere else, you need to provide
 # appropriate setups in the elixir loading a restart file
 
-restart_filename = joinpath("out", "restart_000010.h5")
+restart_filename = joinpath("out", "restart_000000010.h5")
 mesh = load_mesh(restart_filename)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,

From 677236f1970be823380fb3d47bf7f99a4af244d5 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:07:38 +0200
Subject: [PATCH 62/89] Update src/meshes/mesh_io.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 src/meshes/mesh_io.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index 684d8c34943..a93401b624f 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -239,7 +239,7 @@ function save_mesh_file(mesh::T8codeMesh, output_directory, timestep,
 
     # Determine file name based on existence of meaningful time step.
     if timestep > 0
-        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+        filename = joinpath(output_directory, @sprintf("mesh_%09d.h5", timestep))
     else
         filename = joinpath(output_directory, "mesh.h5")
     end

From 404fc85d5716a45c8aaca3d907d92cfd4df0954a Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Thu, 4 Jul 2024 10:51:33 +0200
Subject: [PATCH 63/89] Update
 examples/t8code_3d_dgsem/elixir_advection_restart.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 examples/t8code_3d_dgsem/elixir_advection_restart.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/t8code_3d_dgsem/elixir_advection_restart.jl b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
index 9d19d81cf47..dfce8d2558a 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
@@ -1,4 +1,3 @@
-
 using OrdinaryDiffEq
 using Trixi
 

From 611a5761efbdce07a3c20cbc444e603a511c4f2e Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Thu, 4 Jul 2024 10:51:57 +0200
Subject: [PATCH 64/89] Update src/meshes/t8code_mesh.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 src/meshes/t8code_mesh.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 19ee671f6ce..95f1d2ccba4 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -107,7 +107,7 @@ end
 """
     T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
                boundary_names, treeIDs, neighIDs, faces, duals,
-               orientations, levels, num_elements_per_tree))
+               orientations, levels, num_elements_per_tree)
 
 Constructor for the `T8codeMesh`. Typically called by the `load_mesh` routine. 
 

From 7161d1b7b57604c67e9ef2fd707286dfe82df618 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Thu, 4 Jul 2024 11:23:24 +0200
Subject: [PATCH 65/89] Update src/meshes/t8code_mesh.jl

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 src/meshes/t8code_mesh.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 95f1d2ccba4..eda5c376434 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -1446,7 +1446,7 @@ function get_levels(mesh::T8codeMesh)
 end
 
 function get_cmesh_info(mesh::T8codeMesh)
-    @assert t8_forest_is_committed(mesh.forest) != 0
+    @assert t8_forest_is_committed(mesh.forest) == 1
     cmesh = t8_forest_get_cmesh(mesh.forest)
     return get_cmesh_info(cmesh, ndims(mesh))
 end

From 15cdc0b9bf975ed58c57088fa39eb9111723dc5a Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Fri, 5 Jul 2024 10:24:55 +0200
Subject: [PATCH 66/89] Removing last test in t8code 2D MPI to investigate
 problems in Github CI.

---
 test/test_mpi_t8code_2d.jl | 50 +++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/test/test_mpi_t8code_2d.jl b/test/test_mpi_t8code_2d.jl
index db8e3025846..58dfd3c6fe3 100644
--- a/test/test_mpi_t8code_2d.jl
+++ b/test/test_mpi_t8code_2d.jl
@@ -129,31 +129,31 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_2d_dgsem")
         end
     end
 
-    @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
-        @test_trixi_include(joinpath(EXAMPLES_DIR,
-                                     "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
-                            l2=[
-                                0.0034516244508588046,
-                                0.0023420334036925493,
-                                0.0024261923964557187,
-                                0.004731710454271893,
-                            ],
-                            linf=[
-                                0.04155789011775046,
-                                0.024772109862748914,
-                                0.03759938693042297,
-                                0.08039824959535657,
-                            ])
-
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-            t = sol.t[end]
-            u_ode = sol.u[end]
-            du_ode = similar(u_ode)
-            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-        end
-    end
+    # @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
+    #     @test_trixi_include(joinpath(EXAMPLES_DIR,
+    #                                  "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
+    #                         l2=[
+    #                             0.0034516244508588046,
+    #                             0.0023420334036925493,
+    #                             0.0024261923964557187,
+    #                             0.004731710454271893,
+    #                         ],
+    #                         linf=[
+    #                             0.04155789011775046,
+    #                             0.024772109862748914,
+    #                             0.03759938693042297,
+    #                             0.08039824959535657,
+    #                         ])
+
+    #     # Ensure that we do not have excessive memory allocations
+    #     # (e.g., from type instabilities)
+    #     let
+    #         t = sol.t[end]
+    #         u_ode = sol.u[end]
+    #         du_ode = similar(u_ode)
+    #         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    #     end
+    # end
 end
 end # T8codeMesh MPI
 

From 971d2cf08abf934722573934ebeba63aaa1c506f Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Jul 2024 11:22:50 +0200
Subject: [PATCH 67/89] Refactored a bit.

---
 src/meshes/t8code_mesh.jl | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index eda5c376434..eb16122b660 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,11 +26,15 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
-    unsaved_changes::Bool
+    unsaved_changes :: Bool
+
+    # Keeps a reference to the geometry handler in order to avoid gargabe
+    # collection if necessary.
+    geometry :: Any
 
     function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
-                               current_filename) where {NDIMS}
+                               current_filename; geometry = nothing) where {NDIMS}
         is_parallel = mpi_isparallel() ? True() : False()
 
         mesh = new{NDIMS, Float64, typeof(is_parallel), NDIMS + 2, length(nodes)}(forest,
@@ -41,6 +45,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
         mesh.current_filename = current_filename
         mesh.tree_node_coordinates = tree_node_coordinates
         mesh.unsaved_changes = true
+        mesh.geometry = geometry
 
         finalizer(mesh) do mesh
             # When finalizing `mesh.forest`, `mesh.scheme` and `mesh.cmesh` are
@@ -131,18 +136,19 @@ Returns a `T8codeMesh` object with a forest reconstructed by the input arguments
 function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
                     boundary_names, treeIDs, neighIDs, faces, duals,
                     orientations, levels, num_elements_per_tree)
-    Trixi.cmesh_ref = Ref(t8_cmesh_t())
-    t8_cmesh_init(Trixi.cmesh_ref)
-    cmesh = Trixi.cmesh_ref[]
+    # Initialize the `cmesh` object.
+    cmesh_ref = Ref(t8_cmesh_t())
+    t8_cmesh_init(cmesh_ref)
+    cmesh = cmesh_ref[]
 
     # Use linear geometry for now. There is no real Lagrange geometry
     # implementation (volume nodes) yet in t8code. Moreover, we need to store
-    # the pointer variables in the `Trixi` package in order to avoid garbage
+    # the pointer variables in the `mesh` object in order to avoid garbage
     # collection. Otherwise t8code segfaults. This is an un-feature of t8code
     # (recently introduced) and will be fixed in the near future.
-    Trixi.linear_geom = Trixi.t8_geometry_linear_new(ndims)
-    Trixi.linear_geom_ptr = pointer_from_objref(Ref(Trixi.linear_geom))
-    t8_cmesh_register_geometry(cmesh, Trixi.linear_geom_ptr)
+    linear_geom = t8_geometry_linear_new(ndims)
+    linear_geom_ptr = pointer_from_objref(Ref(linear_geom))
+    t8_cmesh_register_geometry(cmesh, linear_geom_ptr)
 
     # Determine element class.
     eclass = ndims > 2 ? T8_ECLASS_HEX : T8_ECLASS_QUAD
@@ -261,7 +267,7 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
         forest = partition(forest)
     end
 
-    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
+    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, ""; geometry = linear_geom_ptr)
 end
 
 """

From a3c169c34e30235ce4f7ba12aead27431499baa7 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Jul 2024 11:27:03 +0200
Subject: [PATCH 68/89] Applied formatter.

---
 src/meshes/t8code_mesh.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index eb16122b660..ad226c8db6b 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -26,11 +26,11 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
     nmpiinterfaces :: Int
     nmpimortars    :: Int
 
-    unsaved_changes :: Bool
+    unsaved_changes::Bool
 
     # Keeps a reference to the geometry handler in order to avoid gargabe
     # collection if necessary.
-    geometry :: Any
+    geometry::Any
 
     function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
@@ -267,7 +267,8 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
         forest = partition(forest)
     end
 
-    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, ""; geometry = linear_geom_ptr)
+    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "";
+                             geometry = linear_geom_ptr)
 end
 
 """

From 63835207e4c372ebadf7157c49152ed2b4344485 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 8 Jul 2024 11:27:48 +0200
Subject: [PATCH 69/89] Removed commented code.

---
 test/test_mpi_t8code_2d.jl | 50 +++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/test/test_mpi_t8code_2d.jl b/test/test_mpi_t8code_2d.jl
index 58dfd3c6fe3..db8e3025846 100644
--- a/test/test_mpi_t8code_2d.jl
+++ b/test/test_mpi_t8code_2d.jl
@@ -129,31 +129,31 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "t8code_2d_dgsem")
         end
     end
 
-    # @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
-    #     @test_trixi_include(joinpath(EXAMPLES_DIR,
-    #                                  "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
-    #                         l2=[
-    #                             0.0034516244508588046,
-    #                             0.0023420334036925493,
-    #                             0.0024261923964557187,
-    #                             0.004731710454271893,
-    #                         ],
-    #                         linf=[
-    #                             0.04155789011775046,
-    #                             0.024772109862748914,
-    #                             0.03759938693042297,
-    #                             0.08039824959535657,
-    #                         ])
-
-    #     # Ensure that we do not have excessive memory allocations
-    #     # (e.g., from type instabilities)
-    #     let
-    #         t = sol.t[end]
-    #         u_ode = sol.u[end]
-    #         du_ode = similar(u_ode)
-    #         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
-    #     end
-    # end
+    @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
+                            l2=[
+                                0.0034516244508588046,
+                                0.0023420334036925493,
+                                0.0024261923964557187,
+                                0.004731710454271893,
+                            ],
+                            linf=[
+                                0.04155789011775046,
+                                0.024772109862748914,
+                                0.03759938693042297,
+                                0.08039824959535657,
+                            ])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+            t = sol.t[end]
+            u_ode = sol.u[end]
+            du_ode = similar(u_ode)
+            @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+        end
+    end
 end
 end # T8codeMesh MPI
 

From 41ebc0762130c0bbe58f4bf9f572c1f986775150 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 9 Jul 2024 16:46:38 +0200
Subject: [PATCH 70/89] Added LOG_LEVEL variable.

---
 src/auxiliary/t8code.jl | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index d7703b8bbb4..17b90c92ad1 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -14,19 +14,25 @@ function init_t8code()
             return nothing
         end
 
-        # Initialize the sc library, has to happen before we initialize t8code.
-        let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
-            T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
-                                 T8code.Libt8.SC_LP_ERROR)
+        # Initialize `libsc`, `p4est`, and `t8code` with log level
+        # `SC_LP_ERROR` to prevent a lot of output in AMR simulations
+        # For development, log level `SC_LP_DEBUG` is recommended.
+        LOG_LEVEL = T8code.Libt8.SC_LP_ERROR
+
+        if T8code.Libt8.sc_is_initialized() == 0
+            # Initialize the sc library, has to happen before we initialize t8code.
+            let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
+                T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
+                                     LOG_LEVEL)
+            end
         end
 
         if T8code.Libt8.p4est_is_initialized() == 0
-            # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations
-            T8code.Libt8.p4est_init(C_NULL, T8code.Libt8.SC_LP_ERROR)
+            T8code.Libt8.p4est_init(C_NULL, LOG_LEVEL)
         end
 
-        # Initialize t8code with log level ERROR to prevent a lot of output in AMR simulations.
-        t8_init(T8code.Libt8.SC_LP_ERROR)
+        # Initialize t8code.
+        t8_init(LOG_LEVEL)
 
         if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
             # Normally, `sc_finalize` should always be called during shutdown of an

From e268b33b920d6fac946d0ede046e412f5d48f9b6 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 9 Jul 2024 17:26:57 +0200
Subject: [PATCH 71/89] Added t8code interface simplfication and stitched
 memory leak.

---
 Project.toml              |  2 +-
 src/meshes/t8code_mesh.jl | 43 ++++++++++++++++-----------------------
 2 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/Project.toml b/Project.toml
index 954b22ccaf1..995d46eba4d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -99,7 +99,7 @@ StaticArrays = "1.5"
 StrideArrays = "0.1.26"
 StructArrays = "0.6.11"
 SummationByPartsOperators = "0.5.41"
-T8code = "0.5"
+T8code = "0.6"
 TimerOutputs = "0.5.7"
 Triangulate = "2.2"
 TriplotBase = "0.1"
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index ad226c8db6b..5089a33bba8 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -28,13 +28,9 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
 
     unsaved_changes::Bool
 
-    # Keeps a reference to the geometry handler in order to avoid gargabe
-    # collection if necessary.
-    geometry::Any
-
     function T8codeMesh{NDIMS}(forest::Ptr{t8_forest}, tree_node_coordinates, nodes,
                                boundary_names,
-                               current_filename; geometry = nothing) where {NDIMS}
+                               current_filename) where {NDIMS}
         is_parallel = mpi_isparallel() ? True() : False()
 
         mesh = new{NDIMS, Float64, typeof(is_parallel), NDIMS + 2, length(nodes)}(forest,
@@ -45,7 +41,6 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
         mesh.current_filename = current_filename
         mesh.tree_node_coordinates = tree_node_coordinates
         mesh.unsaved_changes = true
-        mesh.geometry = geometry
 
         finalizer(mesh) do mesh
             # When finalizing `mesh.forest`, `mesh.scheme` and `mesh.cmesh` are
@@ -60,6 +55,7 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
             # objects during long-running sessions.
             if !MPI.Finalized()
                 t8_forest_unref(Ref(mesh.forest))
+                mesh.forest = C_NULL
             end
         end
 
@@ -136,19 +132,13 @@ Returns a `T8codeMesh` object with a forest reconstructed by the input arguments
 function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
                     boundary_names, treeIDs, neighIDs, faces, duals,
                     orientations, levels, num_elements_per_tree)
-    # Initialize the `cmesh` object.
-    cmesh_ref = Ref(t8_cmesh_t())
-    t8_cmesh_init(cmesh_ref)
-    cmesh = cmesh_ref[]
+    # Allocate new cmesh object.
+    cmesh = t8_cmesh_new()
 
     # Use linear geometry for now. There is no real Lagrange geometry
-    # implementation (volume nodes) yet in t8code. Moreover, we need to store
-    # the pointer variables in the `mesh` object in order to avoid garbage
-    # collection. Otherwise t8code segfaults. This is an un-feature of t8code
-    # (recently introduced) and will be fixed in the near future.
+    # implementation (volume nodes) yet in t8code.
     linear_geom = t8_geometry_linear_new(ndims)
-    linear_geom_ptr = pointer_from_objref(Ref(linear_geom))
-    t8_cmesh_register_geometry(cmesh, linear_geom_ptr)
+    t8_cmesh_register_geometry(cmesh, linear_geom)
 
     # Determine element class.
     eclass = ndims > 2 ? T8_ECLASS_HEX : T8_ECLASS_QUAD
@@ -267,8 +257,7 @@ function T8codeMesh(ndims, ntrees, nelements, tree_node_coordinates, nodes,
         forest = partition(forest)
     end
 
-    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "";
-                             geometry = linear_geom_ptr)
+    return T8codeMesh{ndims}(forest, tree_node_coordinates, nodes, boundary_names, "")
 end
 
 """
@@ -1090,11 +1079,12 @@ function count_interfaces(forest::Ptr{t8_forest}, ndims)
                             end
                         end
                     end
-                end
 
-                t8_free(dual_faces_ref[])
-                t8_free(pneighbor_leaves_ref[])
-                t8_free(pelement_indices_ref[])
+                    t8_element_destroy(neighbor_scheme, num_neighbors, neighbor_leaves)
+                    t8_free(dual_faces_ref[])
+                    t8_free(pneighbor_leaves_ref[])
+                    t8_free(pelement_indices_ref[])
+                end
             end # for
 
             current_index += 1
@@ -1434,11 +1424,12 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                             end
                         end
                     end
-                end
 
-                t8_free(dual_faces_ref[])
-                t8_free(pneighbor_leaves_ref[])
-                t8_free(pelement_indices_ref[])
+                    t8_element_destroy(neighbor_scheme, num_neighbors, neighbor_leaves)
+                    t8_free(dual_faces_ref[])
+                    t8_free(pneighbor_leaves_ref[])
+                    t8_free(pelement_indices_ref[])
+                end # num_neighbors
             end # for iface
 
             current_index += 1

From 1e00cb6d9190b2546703638cbca57e4ff8a4d3ca Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Tue, 9 Jul 2024 17:34:56 +0200
Subject: [PATCH 72/89] Applied formatter.

---
 src/auxiliary/t8code.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 17b90c92ad1..86af91b5b51 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -22,7 +22,8 @@ function init_t8code()
         if T8code.Libt8.sc_is_initialized() == 0
             # Initialize the sc library, has to happen before we initialize t8code.
             let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
-                T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
+                T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace,
+                                     log_handler,
                                      LOG_LEVEL)
             end
         end

From 8493c8ed1899457a34c9f320a57b8efd22c0e41b Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 22 Jul 2024 16:22:38 +0200
Subject: [PATCH 73/89] Simplifying finailze behavior for T8codeMesh.

---
 src/auxiliary/t8code.jl   | 18 +++++++-----------
 src/meshes/t8code_mesh.jl | 27 +++++----------------------
 2 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 86af91b5b51..71b05f6ccdb 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -32,19 +32,15 @@ function init_t8code()
             T8code.Libt8.p4est_init(C_NULL, LOG_LEVEL)
         end
 
+        MPI.add_finalize_hook!() do
+            status = T8code.Libt8.sc_finalize_noabort()
+            if status != 0
+              @warn("Inconsistent state detected after finalizing t8code. Have you finalized all `T8codeMesh` objects and/or properly freed/un-referenced all t8code related objects?")
+            end
+        end
+
         # Initialize t8code.
         t8_init(LOG_LEVEL)
-
-        if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
-            # Normally, `sc_finalize` should always be called during shutdown of an
-            # application. It checks whether there is still un-freed memory by t8code
-            # and/or T8code.jl and throws an exception if this is the case. For
-            # production runs this is not mandatory, but is helpful during
-            # development. Hence, this option is only activated when environment
-            # variable TRIXI_T8CODE_SC_FINALIZE exists.
-            @info "T8code.jl: `sc_finalize` will be called during shutdown of Trixi.jl."
-            MPI.add_finalize_hook!(T8code.Libt8.sc_finalize)
-        end
     else
         @warn "Preferences for T8code.jl are not set correctly. Until fixed, using `T8codeMesh` will result in a crash. " *
               "See also https://trixi-framework.github.io/Trixi.jl/stable/parallelization/#parallel_system_MPI"
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 5089a33bba8..f0afdb8b0b9 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -43,31 +43,14 @@ mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
         mesh.unsaved_changes = true
 
         finalizer(mesh) do mesh
-            # When finalizing `mesh.forest`, `mesh.scheme` and `mesh.cmesh` are
+            # When finalizing, `forest`, `scheme`, `cmesh`, and `geometry` are
             # also cleaned up from within `t8code`. The cleanup code for
             # `cmesh` does some MPI calls for deallocating shared memory
             # arrays. Due to garbage collection in Julia the order of shutdown
-            # is not deterministic. The following code might happen after MPI
-            # is already in finalized state.
-            # If the environment variable `TRIXI_T8CODE_SC_FINALIZE` is set the
-            # `finalize_hook` of the MPI module takes care of the cleanup. See
-            # further down. However, this might cause a pile-up of `mesh`
-            # objects during long-running sessions.
-            if !MPI.Finalized()
-                t8_forest_unref(Ref(mesh.forest))
-                mesh.forest = C_NULL
-            end
-        end
-
-        # This finalizer call is only recommended during development and not for
-        # production runs, especially long-running sessions since a reference to
-        # the `mesh` object will be kept throughout the lifetime of the session.
-        # See comments in `init_t8code()` in file `src/auxiliary/t8code.jl` for
-        # more information.
-        if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
-            MPI.add_finalize_hook!() do
-                t8_forest_unref(Ref(mesh.forest))
-            end
+            # is not deterministic. Hence, "manual" finalization might be
+            # necessary in order to avoid MPI-related error output when closing
+            # the Julia program/session.
+            t8_forest_unref(Ref(mesh.forest))
         end
 
         return mesh

From 2d4bfb36d3eeba6f1fd981d01333a006de04b492 Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 22 Jul 2024 16:25:37 +0200
Subject: [PATCH 74/89] Addeing finalize call to T8codeMesh examples.

---
 .../elixir_advection_amr_solution_independent.jl              | 4 ++++
 .../t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl | 4 ++++
 examples/t8code_2d_dgsem/elixir_advection_basic.jl            | 4 ++++
 examples/t8code_2d_dgsem/elixir_advection_extended.jl         | 4 ++++
 .../t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl    | 4 ++++
 examples/t8code_2d_dgsem/elixir_advection_restart.jl          | 4 ++++
 examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl      | 4 ++++
 .../t8code_2d_dgsem/elixir_advection_unstructured_flag.jl     | 4 ++++
 examples/t8code_2d_dgsem/elixir_euler_free_stream.jl          | 4 ++++
 examples/t8code_2d_dgsem/elixir_euler_sedov.jl                | 4 ++++
 examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl    | 4 ++++
 ...ixir_euler_source_terms_nonconforming_unstructured_flag.jl | 4 ++++
 examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl   | 4 ++++
 examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl            | 4 ++++
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl                  | 4 ++++
 examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl  | 4 ++++
 examples/t8code_3d_dgsem/elixir_advection_amr.jl              | 4 ++++
 .../elixir_advection_amr_unstructured_curved.jl               | 4 ++++
 examples/t8code_3d_dgsem/elixir_advection_basic.jl            | 4 ++++
 examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl    | 4 ++++
 examples/t8code_3d_dgsem/elixir_advection_restart.jl          | 4 ++++
 .../t8code_3d_dgsem/elixir_advection_unstructured_curved.jl   | 4 ++++
 examples/t8code_3d_dgsem/elixir_euler_ec.jl                   | 4 ++++
 examples/t8code_3d_dgsem/elixir_euler_free_stream.jl          | 4 ++++
 examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl | 4 ++++
 examples/t8code_3d_dgsem/elixir_euler_sedov.jl                | 4 ++++
 ...ir_euler_source_terms_nonconforming_unstructured_curved.jl | 4 ++++
 .../t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl  | 4 ++++
 28 files changed, 112 insertions(+)

diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
index cfa0a34bb34..f7e5cc39506 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
@@ -152,3 +152,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
index bd82809e016..b6d92d99c2a 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -93,3 +93,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             save_everystep = false, callback = callbacks);
 
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_basic.jl b/examples/t8code_2d_dgsem/elixir_advection_basic.jl
index 26ced0970fe..2c19ace1dea 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_basic.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_basic.jl
@@ -55,3 +55,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_extended.jl b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
index f7e06dd517e..034197ce9d8 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_extended.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_extended.jl
@@ -83,3 +83,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
index 7230c8c0b9e..cb14a7c23ae 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -90,3 +90,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart.jl b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
index 6fb14c60038..9438eb4c38f 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart.jl
@@ -41,3 +41,7 @@ load_timestep!(integrator, restart_filename)
 
 sol = solve!(integrator)
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
index 2c0f14c8d10..50b04a78fd7 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_restart_amr.jl
@@ -60,3 +60,7 @@ load_timestep!(integrator, restart_filename)
 
 sol = solve!(integrator)
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
index aed6c755d5c..f13f0fae05a 100644
--- a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -76,3 +76,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary.
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
index b2d49e3ccfe..349678f1637 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
@@ -91,3 +91,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_euler_sedov.jl b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
index fae7d818ad8..e8ec3a7a9e6 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
@@ -98,3 +98,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
index 2a0c4a6ee20..a7d10006593 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
@@ -70,3 +70,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
index 19502f1ce0e..9f9ad16dc9b 100644
--- a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
+++ b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
@@ -94,3 +94,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
index e63c1297882..1476dfa05c7 100644
--- a/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
+++ b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
@@ -81,3 +81,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
 println("Number of gravity subcycles: ", semi.gravity_counter.ncalls_since_readout)
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl b/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
index e184cb3fd05..04c36dd8642 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
@@ -57,3 +57,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
index 88a2888de62..d171d13578e 100644
--- a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -137,3 +137,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
index 688ddb2dbb5..0a937664493 100644
--- a/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
+++ b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
@@ -59,3 +59,7 @@ callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, sav
 sol = solve(ode, RDPK3SpFSAL49(); abstol = 1.0e-8, reltol = 1.0e-8,
             ode_default_options()..., callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr.jl b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
index d2a0e3fb666..6b9e41a17b4 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr.jl
@@ -77,3 +77,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
index 56b2574849f..74d0fa15c47 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_amr_unstructured_curved.jl
@@ -112,3 +112,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_basic.jl b/examples/t8code_3d_dgsem/elixir_advection_basic.jl
index ae97a73d182..415a9e78ba2 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_basic.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_basic.jl
@@ -65,3 +65,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl b/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
index 0755a76ef45..f12c760bb1b 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_nonconforming.jl
@@ -87,3 +87,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_restart.jl b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
index dfce8d2558a..0c158d72741 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_restart.jl
@@ -38,3 +38,7 @@ load_timestep!(integrator, restart_filename)
 
 sol = solve!(integrator)
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
index f49137e054d..7c332e5b77a 100644
--- a/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_advection_unstructured_curved.jl
@@ -96,3 +96,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
 
 # Print the timer summary
 summary_callback()
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_ec.jl b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
index e75b0f69636..2e13f7edd57 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_ec.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_ec.jl
@@ -87,3 +87,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
index d45de658cc0..daf431a86ef 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream.jl
@@ -114,3 +114,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
index d24512a4cdd..19cdea7ee3c 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_free_stream_extruded.jl
@@ -102,3 +102,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), #maxiters=1
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_sedov.jl b/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
index f897249ed2e..9f42ad24ac7 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_sedov.jl
@@ -100,3 +100,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
index 4b87b646df9..1346f603608 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_curved.jl
@@ -116,3 +116,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)
diff --git a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
index ce1662c8e50..612b1640239 100644
--- a/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
+++ b/examples/t8code_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl
@@ -66,3 +66,7 @@ sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
             dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
             save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
+
+# Finalize `T8codeMesh` to make sure MPI related objects in t8code are
+# released before `MPI` finalizes.
+!isinteractive() && finalize(mesh)

From 573133a2eb439a002af23d9b0b5c53449adcae3a Mon Sep 17 00:00:00 2001
From: Johannes Markert <johannes.markert@dlr.de>
Date: Mon, 22 Jul 2024 16:31:47 +0200
Subject: [PATCH 75/89] Applied formatter.

---
 src/auxiliary/t8code.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
index 71b05f6ccdb..ddc0a2f1399 100644
--- a/src/auxiliary/t8code.jl
+++ b/src/auxiliary/t8code.jl
@@ -35,7 +35,7 @@ function init_t8code()
         MPI.add_finalize_hook!() do
             status = T8code.Libt8.sc_finalize_noabort()
             if status != 0
-              @warn("Inconsistent state detected after finalizing t8code. Have you finalized all `T8codeMesh` objects and/or properly freed/un-referenced all t8code related objects?")
+                @warn("Inconsistent state detected after finalizing t8code. Have you finalized all `T8codeMesh` objects and/or properly freed/un-referenced all t8code related objects?")
             end
         end
 

From 2393f83be3cf6672375b9b8781ad9c1295c90641 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 22 Aug 2024 09:56:35 +0200
Subject: [PATCH 76/89] use Base.min/max in MPI.Allreduce

MPI.jl's reduce currently does not work for custom operators (such as Trixi's
min/max) on ARM
---
 src/callbacks_step/analysis.jl               |  2 +-
 src/callbacks_step/analysis_dg2d_parallel.jl |  2 +-
 src/callbacks_step/analysis_dg3d_parallel.jl |  2 +-
 src/callbacks_step/stepsize_dg2d.jl          | 12 ++++++------
 src/callbacks_step/stepsize_dg3d.jl          |  8 ++++----
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 24cf363dd95..7f63d031461 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -448,7 +448,7 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
-                global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+                global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
                 end
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 000daa015dc..fdf7d2ea6c0 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,7 +131,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index de777be406d..4f9b9ccd27f 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,7 +49,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 41251506a0d..08e57671ec8 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,7 +54,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -70,7 +70,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -154,7 +154,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -170,7 +170,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -186,7 +186,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -202,7 +202,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index 6afa4ac2d35..f1c70f5c2e9 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -237,7 +237,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -253,7 +253,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -269,7 +269,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -285,7 +285,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end

From 6607023a875a5c9e408630e6d71bd7f17d9fb6cd Mon Sep 17 00:00:00 2001
From: s6nistam <nico.stamos@alumni.fh-aachen.de>
Date: Mon, 23 Sep 2024 09:43:14 +0200
Subject: [PATCH 77/89] ParaviewCatalyst callback working except PlotData3D

---
 .../elixir_advection_basic_catalyst.jl        |  62 +++++
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 .../elixir_advection_basic_catalyst.jl        |  62 +++++
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 .../elixir_advection_basic_catalyst.jl        |  62 +++++
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 src/callbacks_step/paraview_catalyst.jl       | 229 ++++++++++++++++++
 .../paraview_catalyst.jl:Zone.Identifier      |   0
 8 files changed, 415 insertions(+)
 create mode 100644 examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 create mode 100644 examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 create mode 100644 examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
 create mode 100644 examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 create mode 100644 src/callbacks_step/paraview_catalyst.jl
 create mode 100644 src/callbacks_step/paraview_catalyst.jl:Zone.Identifier

diff --git a/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
new file mode 100644
index 00000000000..b6d6cd905c3
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
@@ -0,0 +1,62 @@
+
+using OrdinaryDiffEq
+using Trixi
+using ParaviewCatalyst
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = 1.0
+equations = LinearScalarAdvectionEquation1D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = -1.0 # minimum coordinate
+coordinates_max = 1.0 # maximum coordinate
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 4,
+                n_cells_max = 30_000) # set maximum capacity of tree data structure
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+ode = semidiscretize(semi, (0.0, 1.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+catalyst_callback = ParaviewCatalystCallback(interval=10)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback, catalyst_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
new file mode 100644
index 00000000000..b4f5cfebe63
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
@@ -0,0 +1,62 @@
+
+using OrdinaryDiffEq
+using Trixi
+using ParaviewCatalyst
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 4,
+                n_cells_max = 30_000) # set maximum capacity of tree data structure
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+ode = semidiscretize(semi, (0.0, 1.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+catalyst_callback = ParaviewCatalystCallback(interval=10)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback, catalyst_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
new file mode 100644
index 00000000000..ba6152dee43
--- /dev/null
+++ b/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
@@ -0,0 +1,62 @@
+
+using OrdinaryDiffEq
+using Trixi
+using ParaviewCatalyst
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7, 0.5)
+equations = LinearScalarAdvectionEquation3D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z))
+coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z))
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 3,
+                n_cells_max = 30_000) # set maximum capacity of tree data structure
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+ode = semidiscretize(semi, (0.0, 1.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval = 100,
+                                     solution_variables = cons2prim)
+#
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.2)
+
+catalyst_callback = ParaviewCatalystCallback(interval=10)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
+                        stepsize_callback, catalyst_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/callbacks_step/paraview_catalyst.jl b/src/callbacks_step/paraview_catalyst.jl
new file mode 100644
index 00000000000..e0e40d01935
--- /dev/null
+++ b/src/callbacks_step/paraview_catalyst.jl
@@ -0,0 +1,229 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+mutable struct ParaviewCatalystCallback
+    interval::Int
+end
+
+function Base.show(io::IO,
+                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
+                                                                    Affect! <:
+                                                                    ParaviewCatalystCallback
+                                                                    }
+    visualization_callback = cb.affect!
+    @unpack interval = visualization_callback
+    print(io, "ParaviewCatalystCallback(",
+          "interval=", interval,")")
+end
+
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
+                                                                    Affect! <:
+                                                                    ParaviewCatalystCallback
+                                                                    }
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        visualization_callback = cb.affect!
+
+        setup = [
+            "interval" => visualization_callback.interval,
+            
+        ]
+        summary_box(io, "ParaviewCatalystCallback", setup)
+    end
+end
+
+"""
+    ParaviewCatalystCallback(; interval=0,
+                            )
+
+Create a callback that visualizes results during a simulation, also known as *in-situ
+visualization*.
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in any future releases.
+"""
+function ParaviewCatalystCallback(; interval = 0,
+                               )
+    mpi_isparallel() && error("this callback does not work in parallel yet")
+
+    ParaviewCatalyst.catalyst_initialize(libpath="/home/nico/Paraview/ParaView-5.13.0-MPI-Linux-Python3.10-x86_64/lib/catalyst")
+
+    visualization_callback = ParaviewCatalystCallback(interval)
+
+    # Warn users if they create a ParaviewCatalystCallback without having loaded the ParaviewCatalyst package
+    if !(:ParaviewCatalyst in nameof.(Base.loaded_modules |> values))
+        @warn "Package `ParaviewCatalyst` not loaded but required by `ParaviewCatalystCallback` to visualize results"
+    end
+
+    DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
+end
+
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: ParaviewCatalystCallback}
+    visualization_callback = cb.affect!
+
+    visualization_callback(integrator)
+
+    return nothing
+end
+
+# this method is called to determine whether the callback should be activated
+function (visualization_callback::ParaviewCatalystCallback)(u, t, integrator)
+    @unpack interval = visualization_callback
+
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    return interval > 0 && (integrator.stats.naccept % interval == 0 ||
+            isfinished(integrator))
+end
+
+# this method is called when the callback is activated
+function (visualization_callback::ParaviewCatalystCallback)(integrator)
+    u_ode = integrator.u
+    mesh, equations, solver, cache = mesh_equations_solver_cache(integrator.p)
+    time = integrator.t
+    timestep = integrator.stats.naccept
+
+    leaf_cell_ids = leaf_cells(mesh.tree)                     # Indices der "echten" Zellen
+    coordinates = mesh.tree.coordinates[:, leaf_cell_ids]     # Koordinaten der Mittelpunkte der Zellen
+    cell_levels = mesh.tree.levels[leaf_cell_ids]                           # Level der Zellen im Baum (pro Level werden die Kanten halbiert)
+    mesh.tree.center_level_0                                  # Mittelpunkt auf Level 0, d.h. Mittelpunkt des gesamten Gitters
+    length_level_0 = mesh.tree.length_level_0                                  # Kantenlänge auf Level 0
+    cell_length = [2.0^-cell_level * length_level_0 for cell_level in cell_levels] 
+    min_cell_length = min(cell_length...)
+
+    println()
+    println(length(mesh.tree.coordinates[1, leaf_cell_ids]))
+    println("*** Catalyst Callback activated")
+    println("*** Time ", time)
+    println("*** Step ", timestep)
+    println("*** u[1] ", u_ode[1])
+    println("*** coord[1] ", coordinates[1])
+    println()
+
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+
+    ParaviewCatalyst.ConduitNode() do node
+        node["catalyst/state/timestep"] = timestep
+        node["catalyst/state/time"] = timestep
+        node["catalyst/channels/input/type"] = "mesh"
+        node["catalyst/channels/input/data/coordsets/coords/type"] = "uniform"
+
+        pd = nothing
+        c_i = 0
+        c_j = 0
+        c_k = 0
+        x0 = 0
+        y0 = 0
+        z0 = 0
+        dx = 0
+        dy = 0
+        dz = 0
+        if ndims(mesh) == 1
+            pd = PlotData1D(integrator.u, integrator.p)
+            c_i = length(pd.x)
+            x0 = min(pd.x...)
+            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
+        elseif ndims(mesh) == 2
+            pd = PlotData2D(integrator.u, integrator.p)
+            
+            c_i = length(pd.x)
+            x0 = min(pd.x...)
+            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
+
+            c_j = length(pd.y)
+            y0 = min(pd.y...)
+            dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
+        elseif ndims(mesh) == 3
+            z_coords = mesh.tree.coordinates[3, leaf_cell_ids]
+            z_h1 = [[z_coords[i] - 0.5 * cell_length[i] z_coords[i] + 0.5 * cell_length[i]] for i in 1:length(z_coords)]
+            z_h1 = unique!(z_h1)
+            z_h = [z_h1[j][k] for j in 1:length(z_h1) for k in 1:2]
+            pd_z = [PlotData2D(integrator.u, integrator.p, slice=:xy, point=(0,0,z)) for z in z_h]
+            pd = pd_z[1]
+
+            c_i = length(pd.x)
+            x0 = min(pd.x...)
+            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
+
+            c_j = length(pd.y)
+            y0 = min(pd.y...)
+            dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
+
+            c_k = length(z_h)
+            z0 = min(z_h...)
+            dz = min([z_h[i + 1] - z_h[i] for i in 1:(c_k - 1)]...)
+
+            #TODO sobald PlotData3D implementiert ersetzen mit 
+            # pd = PlotData3D(integrator.u, integrator.p)
+            # c_i = length(pd.x)
+            # x0 = min(pd.x...)
+            # dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
+
+            # c_j = length(pd.y)
+            # y0 = min(pd.y...)
+            # dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
+
+            # c_k = length(pd.z)
+            # z0 = min(pd.z...)
+            # dz = min([pd.z[i + 1] - pd.z[i] for i in 1:(c_k - 1)]...)
+        end
+
+        node["catalyst/channels/input/data/coordsets/coords/dims/i"] = c_i
+        node["catalyst/channels/input/data/coordsets/coords/origin/x"] = x0
+        node["catalyst/channels/input/data/coordsets/coords/spacing/dx"] = dx
+        if ndims(mesh) > 1
+            node["catalyst/channels/input/data/coordsets/coords/dims/j"] = c_j
+            node["catalyst/channels/input/data/coordsets/coords/origin/y"] = y0
+            node["catalyst/channels/input/data/coordsets/coords/spacing/dy"] = dy
+            if ndims(mesh) > 2
+                node["catalyst/channels/input/data/coordsets/coords/dims/k"] = c_k
+                node["catalyst/channels/input/data/coordsets/coords/origin/z"] = z0
+                node["catalyst/channels/input/data/coordsets/coords/spacing/dz"] = (c_i/c_k) * dx #TODO sobald PlotData3D implementiert auf dz setzen
+            end
+        end
+
+        node["catalyst/channels/input/data/topologies/mesh/type"] = "uniform"
+        node["catalyst/channels/input/data/topologies/mesh/coordset"] = "coords"
+
+        node["catalyst/channels/input/data/fields/solution/association"] = "vertex"
+        node["catalyst/channels/input/data/fields/solution/topology"] = "mesh"
+        node["catalyst/channels/input/data/fields/solution/volume_dependent"] = "false"
+        if ndims(mesh) == 1
+            node["catalyst/channels/input/data/fields/solution/values"] = pd.data[1]
+        elseif ndims(mesh) == 2
+            solution = [pd.data[1][i,j] for j in 1:c_j for i in 1:c_i]
+            node["catalyst/channels/input/data/fields/solution/values"] = solution
+        elseif ndims(mesh) == 3
+            solution_h = [[pd_z[k].data[1][i,j] for j in 1:c_j for i in 1:c_i] for k in 1:c_k]
+            solution = [solution_h[i][j] for i in 1:c_k for j in 1:(c_i * c_j)]
+
+            #TODO sobald PlotData3D implementiert, ersetzen durch
+            # solution = [pd.data[1][i,j,k] for k in 1:c_k for j in 1:c_j for i in 1:c_i]
+
+            node["catalyst/channels/input/data/fields/solution/values"] = solution
+        end
+        
+
+        # Conduit.node_info(node) do info_node
+        #    Conduit.node_print(info_node, detailed = true)
+        # end
+        ParaviewCatalyst.catalyst_execute(node)
+    end
+
+    return nothing
+end
+
+end # @muladd
diff --git a/src/callbacks_step/paraview_catalyst.jl:Zone.Identifier b/src/callbacks_step/paraview_catalyst.jl:Zone.Identifier
new file mode 100644
index 00000000000..e69de29bb2d

From 1b9a2b17d9225d53d7287a827e3cfea255674f78 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 18 Oct 2024 15:24:10 +0200
Subject: [PATCH 78/89] relax compat bounds to enable current CUDA

---
 Project.toml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/Project.toml b/Project.toml
index b09ecb57be9..b04d44f8478 100644
--- a/Project.toml
+++ b/Project.toml
@@ -32,7 +32,6 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
@@ -70,8 +69,8 @@ ConstructionBase = "1.3"
 Convex = "0.16"
 DataStructures = "0.18.15"
 DelimitedFiles = "1"
-DiffEqBase = "6 - 6.143"
-DiffEqCallbacks = "2.25"
+DiffEqBase = "6"
+DiffEqCallbacks = "2.25, 3, 4"
 Downloads = "1.6"
 ECOS = "1.1.2"
 EllipsisNotation = "1.0"
@@ -94,14 +93,13 @@ PrecompileTools = "1.1"
 Preferences = "1.3"
 Printf = "1"
 RecipesBase = "1.1"
-RecursiveArrayTools = "2.38.10"
 Reexport = "1.0"
 Requires = "1.1"
 SciMLBase = "1.90, 2"
 SimpleUnPack = "1.1"
 SparseArrays = "1"
 StartUpDG = "0.17.7, 1.1.5"
-Static = "0.8.7"
+Static = "0.8.7, 1"
 StaticArrayInterface = "1.4"
 StaticArrays = "1.5"
 StrideArrays = "0.1.26"

From c8bba9bb467994ca65a72de58ff7daae3ef3c562 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 18 Oct 2024 16:02:00 +0200
Subject: [PATCH 79/89] skip failing precompile commands

---
 src/auxiliary/precompile.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index 4d5399b5ba3..0de23c6348d 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -380,10 +380,10 @@ function _precompile_manual_()
     # end
     # end
     @assert Base.precompile(Tuple{typeof(SummaryCallback)})
-    @assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback),
-                                                   typeof(Trixi.summary_callback),
-                                                   typeof(Trixi.initialize_summary_callback),
-                                                   typeof(SciMLBase.FINALIZE_DEFAULT)}})
+    #@assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback),
+    #                                               typeof(Trixi.summary_callback),
+    #                                               typeof(Trixi.initialize_summary_callback),
+    #                                               typeof(SciMLBase.FINALIZE_DEFAULT)}})
     @assert Base.precompile(Tuple{typeof(summary_box), Base.TTY, String,
                                   Vector{Pair{String, Any}}})
     # TODO: AMRCallback, ControllerThreeLevel, indicators
@@ -521,9 +521,9 @@ function _precompile_manual_()
                                                  typeof(Trixi.initialize_summary_callback),
                                                  typeof(SciMLBase.FINALIZE_DEFAULT)}
         @assert Base.precompile(Tuple{typeof(show), Base.TTY, summary_callback_type})
-        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
-                                      summary_callback_type})
-        @assert Base.precompile(Tuple{summary_callback_type, Base.TTY})
+        #@assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+        #                              summary_callback_type})
+        #Base.precompile(Tuple{summary_callback_type, Base.TTY})
 
         # TODO: SteadyStateCallback, AnalysisCallback
 

From 33903d9abd06839684de6e95c518970bd14e52c7 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Mon, 18 Nov 2024 16:06:30 +0100
Subject: [PATCH 80/89] Revert "ParaviewCatalyst callback working except
 PlotData3D"

This reverts commit 6607023a875a5c9e408630e6d71bd7f17d9fb6cd.
---
 .../elixir_advection_basic_catalyst.jl        |  62 -----
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 .../elixir_advection_basic_catalyst.jl        |  62 -----
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 .../elixir_advection_basic_catalyst.jl        |  62 -----
 ...dvection_basic_catalyst.jl:Zone.Identifier |   0
 src/callbacks_step/paraview_catalyst.jl       | 229 ------------------
 .../paraview_catalyst.jl:Zone.Identifier      |   0
 8 files changed, 415 deletions(-)
 delete mode 100644 examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
 delete mode 100644 examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 delete mode 100644 examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
 delete mode 100644 examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 delete mode 100644 examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
 delete mode 100644 examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
 delete mode 100644 src/callbacks_step/paraview_catalyst.jl
 delete mode 100644 src/callbacks_step/paraview_catalyst.jl:Zone.Identifier

diff --git a/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
deleted file mode 100644
index b6d6cd905c3..00000000000
--- a/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-
-using OrdinaryDiffEq
-using Trixi
-using ParaviewCatalyst
-
-###############################################################################
-# semidiscretization of the linear advection equation
-
-advection_velocity = 1.0
-equations = LinearScalarAdvectionEquation1D(advection_velocity)
-
-# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-coordinates_min = -1.0 # minimum coordinate
-coordinates_max = 1.0 # maximum coordinate
-
-# Create a uniformly refined mesh with periodic boundaries
-mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level = 4,
-                n_cells_max = 30_000) # set maximum capacity of tree data structure
-
-# A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
-                                    solver)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-# Create ODE problem with time span from 0.0 to 1.0
-ode = semidiscretize(semi, (0.0, 1.0));
-
-# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
-# and resets the timers
-summary_callback = SummaryCallback()
-
-# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
-analysis_callback = AnalysisCallback(semi, interval = 100)
-
-# The SaveSolutionCallback allows to save the solution to a file in regular intervals
-save_solution = SaveSolutionCallback(interval = 100,
-                                     solution_variables = cons2prim)
-
-# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
-stepsize_callback = StepsizeCallback(cfl = 1.6)
-
-catalyst_callback = ParaviewCatalystCallback(interval=10)
-
-# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
-                        stepsize_callback, catalyst_callback)
-
-###############################################################################
-# run the simulation
-
-# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
-            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep = false, callback = callbacks);
-
-# Print the timer summary
-summary_callback()
diff --git a/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_1d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
deleted file mode 100644
index b4f5cfebe63..00000000000
--- a/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-
-using OrdinaryDiffEq
-using Trixi
-using ParaviewCatalyst
-
-###############################################################################
-# semidiscretization of the linear advection equation
-
-advection_velocity = (0.2, -0.7)
-equations = LinearScalarAdvectionEquation2D(advection_velocity)
-
-# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
-coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
-
-# Create a uniformly refined mesh with periodic boundaries
-mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level = 4,
-                n_cells_max = 30_000) # set maximum capacity of tree data structure
-
-# A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
-                                    solver)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-# Create ODE problem with time span from 0.0 to 1.0
-ode = semidiscretize(semi, (0.0, 1.0));
-
-# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
-# and resets the timers
-summary_callback = SummaryCallback()
-
-# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
-analysis_callback = AnalysisCallback(semi, interval = 100)
-
-# The SaveSolutionCallback allows to save the solution to a file in regular intervals
-save_solution = SaveSolutionCallback(interval = 100,
-                                     solution_variables = cons2prim)
-
-# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
-stepsize_callback = StepsizeCallback(cfl = 1.6)
-
-catalyst_callback = ParaviewCatalystCallback(interval=10)
-
-# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
-                        stepsize_callback, catalyst_callback)
-
-###############################################################################
-# run the simulation
-
-# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
-            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep = false, callback = callbacks);
-
-# Print the timer summary
-summary_callback()
diff --git a/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_2d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl b/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
deleted file mode 100644
index ba6152dee43..00000000000
--- a/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-
-using OrdinaryDiffEq
-using Trixi
-using ParaviewCatalyst
-
-###############################################################################
-# semidiscretization of the linear advection equation
-
-advection_velocity = (0.2, -0.7, 0.5)
-equations = LinearScalarAdvectionEquation3D(advection_velocity)
-
-# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z))
-coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z))
-
-# Create a uniformly refined mesh with periodic boundaries
-mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level = 3,
-                n_cells_max = 30_000) # set maximum capacity of tree data structure
-
-# A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
-                                    solver)
-
-###############################################################################
-# ODE solvers, callbacks etc.
-
-# Create ODE problem with time span from 0.0 to 1.0
-ode = semidiscretize(semi, (0.0, 1.0));
-
-# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
-# and resets the timers
-summary_callback = SummaryCallback()
-
-# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
-analysis_callback = AnalysisCallback(semi, interval = 100)
-
-# The SaveSolutionCallback allows to save the solution to a file in regular intervals
-save_solution = SaveSolutionCallback(interval = 100,
-                                     solution_variables = cons2prim)
-#
-# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
-stepsize_callback = StepsizeCallback(cfl = 1.2)
-
-catalyst_callback = ParaviewCatalystCallback(interval=10)
-
-# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
-callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
-                        stepsize_callback, catalyst_callback)
-
-###############################################################################
-# run the simulation
-
-# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
-            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep = false, callback = callbacks);
-
-# Print the timer summary
-summary_callback()
diff --git a/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier b/examples/tree_3d_dgsem/elixir_advection_basic_catalyst.jl:Zone.Identifier
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/callbacks_step/paraview_catalyst.jl b/src/callbacks_step/paraview_catalyst.jl
deleted file mode 100644
index e0e40d01935..00000000000
--- a/src/callbacks_step/paraview_catalyst.jl
+++ /dev/null
@@ -1,229 +0,0 @@
-# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
-# Since these FMAs can increase the performance of many numerical algorithms,
-# we need to opt-in explicitly.
-# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
-@muladd begin
-#! format: noindent
-
-mutable struct ParaviewCatalystCallback
-    interval::Int
-end
-
-function Base.show(io::IO,
-                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
-                                                                    Affect! <:
-                                                                    ParaviewCatalystCallback
-                                                                    }
-    visualization_callback = cb.affect!
-    @unpack interval = visualization_callback
-    print(io, "ParaviewCatalystCallback(",
-          "interval=", interval,")")
-end
-
-function Base.show(io::IO, ::MIME"text/plain",
-                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
-                                                                    Affect! <:
-                                                                    ParaviewCatalystCallback
-                                                                    }
-    if get(io, :compact, false)
-        show(io, cb)
-    else
-        visualization_callback = cb.affect!
-
-        setup = [
-            "interval" => visualization_callback.interval,
-            
-        ]
-        summary_box(io, "ParaviewCatalystCallback", setup)
-    end
-end
-
-"""
-    ParaviewCatalystCallback(; interval=0,
-                            )
-
-Create a callback that visualizes results during a simulation, also known as *in-situ
-visualization*.
-
-!!! warning "Experimental implementation"
-    This is an experimental feature and may change in any future releases.
-"""
-function ParaviewCatalystCallback(; interval = 0,
-                               )
-    mpi_isparallel() && error("this callback does not work in parallel yet")
-
-    ParaviewCatalyst.catalyst_initialize(libpath="/home/nico/Paraview/ParaView-5.13.0-MPI-Linux-Python3.10-x86_64/lib/catalyst")
-
-    visualization_callback = ParaviewCatalystCallback(interval)
-
-    # Warn users if they create a ParaviewCatalystCallback without having loaded the ParaviewCatalyst package
-    if !(:ParaviewCatalyst in nameof.(Base.loaded_modules |> values))
-        @warn "Package `ParaviewCatalyst` not loaded but required by `ParaviewCatalystCallback` to visualize results"
-    end
-
-    DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect!
-                     save_positions = (false, false),
-                     initialize = initialize!)
-end
-
-function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
-                     integrator) where {Condition, Affect! <: ParaviewCatalystCallback}
-    visualization_callback = cb.affect!
-
-    visualization_callback(integrator)
-
-    return nothing
-end
-
-# this method is called to determine whether the callback should be activated
-function (visualization_callback::ParaviewCatalystCallback)(u, t, integrator)
-    @unpack interval = visualization_callback
-
-    # With error-based step size control, some steps can be rejected. Thus,
-    #   `integrator.iter >= integrator.stats.naccept`
-    #    (total #steps)       (#accepted steps)
-    # We need to check the number of accepted steps since callbacks are not
-    # activated after a rejected step.
-    return interval > 0 && (integrator.stats.naccept % interval == 0 ||
-            isfinished(integrator))
-end
-
-# this method is called when the callback is activated
-function (visualization_callback::ParaviewCatalystCallback)(integrator)
-    u_ode = integrator.u
-    mesh, equations, solver, cache = mesh_equations_solver_cache(integrator.p)
-    time = integrator.t
-    timestep = integrator.stats.naccept
-
-    leaf_cell_ids = leaf_cells(mesh.tree)                     # Indices der "echten" Zellen
-    coordinates = mesh.tree.coordinates[:, leaf_cell_ids]     # Koordinaten der Mittelpunkte der Zellen
-    cell_levels = mesh.tree.levels[leaf_cell_ids]                           # Level der Zellen im Baum (pro Level werden die Kanten halbiert)
-    mesh.tree.center_level_0                                  # Mittelpunkt auf Level 0, d.h. Mittelpunkt des gesamten Gitters
-    length_level_0 = mesh.tree.length_level_0                                  # Kantenlänge auf Level 0
-    cell_length = [2.0^-cell_level * length_level_0 for cell_level in cell_levels] 
-    min_cell_length = min(cell_length...)
-
-    println()
-    println(length(mesh.tree.coordinates[1, leaf_cell_ids]))
-    println("*** Catalyst Callback activated")
-    println("*** Time ", time)
-    println("*** Step ", timestep)
-    println("*** u[1] ", u_ode[1])
-    println("*** coord[1] ", coordinates[1])
-    println()
-
-    # avoid re-evaluating possible FSAL stages
-    u_modified!(integrator, false)
-
-    ParaviewCatalyst.ConduitNode() do node
-        node["catalyst/state/timestep"] = timestep
-        node["catalyst/state/time"] = timestep
-        node["catalyst/channels/input/type"] = "mesh"
-        node["catalyst/channels/input/data/coordsets/coords/type"] = "uniform"
-
-        pd = nothing
-        c_i = 0
-        c_j = 0
-        c_k = 0
-        x0 = 0
-        y0 = 0
-        z0 = 0
-        dx = 0
-        dy = 0
-        dz = 0
-        if ndims(mesh) == 1
-            pd = PlotData1D(integrator.u, integrator.p)
-            c_i = length(pd.x)
-            x0 = min(pd.x...)
-            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
-        elseif ndims(mesh) == 2
-            pd = PlotData2D(integrator.u, integrator.p)
-            
-            c_i = length(pd.x)
-            x0 = min(pd.x...)
-            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
-
-            c_j = length(pd.y)
-            y0 = min(pd.y...)
-            dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
-        elseif ndims(mesh) == 3
-            z_coords = mesh.tree.coordinates[3, leaf_cell_ids]
-            z_h1 = [[z_coords[i] - 0.5 * cell_length[i] z_coords[i] + 0.5 * cell_length[i]] for i in 1:length(z_coords)]
-            z_h1 = unique!(z_h1)
-            z_h = [z_h1[j][k] for j in 1:length(z_h1) for k in 1:2]
-            pd_z = [PlotData2D(integrator.u, integrator.p, slice=:xy, point=(0,0,z)) for z in z_h]
-            pd = pd_z[1]
-
-            c_i = length(pd.x)
-            x0 = min(pd.x...)
-            dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
-
-            c_j = length(pd.y)
-            y0 = min(pd.y...)
-            dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
-
-            c_k = length(z_h)
-            z0 = min(z_h...)
-            dz = min([z_h[i + 1] - z_h[i] for i in 1:(c_k - 1)]...)
-
-            #TODO sobald PlotData3D implementiert ersetzen mit 
-            # pd = PlotData3D(integrator.u, integrator.p)
-            # c_i = length(pd.x)
-            # x0 = min(pd.x...)
-            # dx = min([pd.x[i + 1] - pd.x[i] for i in 1:(c_i - 1)]...)
-
-            # c_j = length(pd.y)
-            # y0 = min(pd.y...)
-            # dy = min([pd.y[i + 1] - pd.y[i] for i in 1:(c_j - 1)]...)
-
-            # c_k = length(pd.z)
-            # z0 = min(pd.z...)
-            # dz = min([pd.z[i + 1] - pd.z[i] for i in 1:(c_k - 1)]...)
-        end
-
-        node["catalyst/channels/input/data/coordsets/coords/dims/i"] = c_i
-        node["catalyst/channels/input/data/coordsets/coords/origin/x"] = x0
-        node["catalyst/channels/input/data/coordsets/coords/spacing/dx"] = dx
-        if ndims(mesh) > 1
-            node["catalyst/channels/input/data/coordsets/coords/dims/j"] = c_j
-            node["catalyst/channels/input/data/coordsets/coords/origin/y"] = y0
-            node["catalyst/channels/input/data/coordsets/coords/spacing/dy"] = dy
-            if ndims(mesh) > 2
-                node["catalyst/channels/input/data/coordsets/coords/dims/k"] = c_k
-                node["catalyst/channels/input/data/coordsets/coords/origin/z"] = z0
-                node["catalyst/channels/input/data/coordsets/coords/spacing/dz"] = (c_i/c_k) * dx #TODO sobald PlotData3D implementiert auf dz setzen
-            end
-        end
-
-        node["catalyst/channels/input/data/topologies/mesh/type"] = "uniform"
-        node["catalyst/channels/input/data/topologies/mesh/coordset"] = "coords"
-
-        node["catalyst/channels/input/data/fields/solution/association"] = "vertex"
-        node["catalyst/channels/input/data/fields/solution/topology"] = "mesh"
-        node["catalyst/channels/input/data/fields/solution/volume_dependent"] = "false"
-        if ndims(mesh) == 1
-            node["catalyst/channels/input/data/fields/solution/values"] = pd.data[1]
-        elseif ndims(mesh) == 2
-            solution = [pd.data[1][i,j] for j in 1:c_j for i in 1:c_i]
-            node["catalyst/channels/input/data/fields/solution/values"] = solution
-        elseif ndims(mesh) == 3
-            solution_h = [[pd_z[k].data[1][i,j] for j in 1:c_j for i in 1:c_i] for k in 1:c_k]
-            solution = [solution_h[i][j] for i in 1:c_k for j in 1:(c_i * c_j)]
-
-            #TODO sobald PlotData3D implementiert, ersetzen durch
-            # solution = [pd.data[1][i,j,k] for k in 1:c_k for j in 1:c_j for i in 1:c_i]
-
-            node["catalyst/channels/input/data/fields/solution/values"] = solution
-        end
-        
-
-        # Conduit.node_info(node) do info_node
-        #    Conduit.node_print(info_node, detailed = true)
-        # end
-        ParaviewCatalyst.catalyst_execute(node)
-    end
-
-    return nothing
-end
-
-end # @muladd
diff --git a/src/callbacks_step/paraview_catalyst.jl:Zone.Identifier b/src/callbacks_step/paraview_catalyst.jl:Zone.Identifier
deleted file mode 100644
index e69de29bb2d..00000000000

From 9c857e476498afadae611053f965c16e2ed19a04 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 17 Dec 2024 12:59:31 +0100
Subject: [PATCH 81/89] edit folder for esiwace project

---
 esiwace/README.md                             | 116 ++++++++++++++++++
 .../elixir_euler_taylor_green_vortex.jl       |  74 +++++++++++
 esiwace/jobscripts/single_node.sh             |  16 +++
 esiwace/run.jl                                |  66 ++++++++++
 4 files changed, 272 insertions(+)
 create mode 100644 esiwace/README.md
 create mode 100644 esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
 create mode 100644 esiwace/jobscripts/single_node.sh
 create mode 100644 esiwace/run.jl

diff --git a/esiwace/README.md b/esiwace/README.md
new file mode 100644
index 00000000000..32265cf8b4e
--- /dev/null
+++ b/esiwace/README.md
@@ -0,0 +1,116 @@
+# ESiWACE3 Trixi.jl service
+
+## Instructions for terrabyte cluster
+
+You need to get an account at https://docs.terrabyte.lrz.de/services/identity/get-account/
+and set up two-factor authentication.
+
+### Login
+```shell
+ssh login.terrabyte.lrz.de
+```
+
+### Set up t8code
+** TODO: change to project directory, then this step can be skipped **
+1. Load modules
+   ```shell
+   module load gcc/11.2.0
+   module load openmpi/4.1.2-gcc11
+   module load hdf5/1.10.7-gcc11
+   ```
+2. Change to scratch folder
+   ```shell
+   cd $SCRATCH
+   ```
+3. Clone the repository
+   ```shell
+   git clone --branch 'v3.0.1' --depth 1 https://github.com/DLR-AMR/t8code.git
+   cd t8code
+   git submodule init
+   git submodule update
+   ```
+4. Build using cmake:
+   ```shell
+   module add cmake
+   mkdir build
+   cd build
+   cmake \
+     -DCMAKE_C_COMPILER=mpicc \
+     -DCMAKE_CXX_COMPILER=mpicxx \
+     -DCMAKE_BUILD_TYPE=Release \
+     -DCMAKE_INSTALL_PREFIX="$SCRATCH/install/t8code" \
+     -DT8CODE_BUILD_TESTS=OFF \
+     -DT8CODE_BUILD_TUTORIALS=OFF \
+     -DT8CODE_BUILD_EXAMPLES=OFF \
+     -DT8CODE_BUILD_BENCHMARKS=OFF \
+     -DT8CODE_ENABLE_MPI=ON
+     ..
+   nice make -j8
+   nice make install -j8
+   ```
+
+## Set up Julia
+Julia is not available on the cluster. We need to install it manually.
+1. If there no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one
+   ```
+   touch $HOME/.bashrc
+   ```
+2. Use the official Julia installer:
+   ```shell 
+   curl -fsSL https://install.julialang.org | sh
+   ```
+   Accept the defaults. Once finished you will be told to source your `.bashrc` or re-login.
+3. Julia should now be available
+   ```shell
+   julia --version
+   ```
+4. Install the 1.11 branch
+   ```shell
+   juliaup add 1.11
+   ```
+
+## Set up Trixi.jl
+1. Clone the repository
+   ```shell
+   git clone https://github.com/benegee/Trixi.jl.git
+   git switch lc/gpu-develop
+   ```
+2. Go to the `esiwace` directory. We collect necessary environmental settings in
+   `profile`. Edit this file as neccessary and source it:
+   ```shell
+   . profile
+   ```
+3. The Julia project is configured by several files: `Project.toml` lists dependencies,
+   `Manifest.toml` list exact version numbers for all required packages,
+   `LocalPreferences.toml` contains advanced configuration options.
+   It should only be necessary to adapt `LocalPreference.toml` to reflect the t8code
+   installation path.
+4. Open Julia via the `$JL` command and instantiate the project:
+   ```shell
+   $JL --project -e 'using Pkg; Pkg.instantiate()'
+   ```
+
+
+## Precompile Trixi.jl
+1. Make sure that everything is precompiled by running the following:
+   ```shell
+   $JL --project -e 'using OrdinaryDiffEq, Trixi'
+   ```
+2. To test CUDA, first log in to a GPU node:
+   ```shell
+   salloc --cluster=hpda2 --partition=hpda2_compute_gpu --nodes=1 --ntasks-per-node=1 --gres=gpu:4 --time=00:30:00
+   ```
+   Then start Julia:
+   ```shell
+   $JL --project -e 'using CUDA; CUDA.versioninfo()'
+   ```
+
+
+## Launch
+1. SLURM jobscript are found in `jobscripts`. Edit as necessary. At least, you have to
+   specify your mail address.
+2. The actual simulation is configured in `run.jl` and based on Trixi.jl file in `elixirs`.
+3. Send job to queue:
+   ```shell
+   sbatch jobscript/single_node.sh
+   ```
diff --git a/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
new file mode 100644
index 00000000000..1d204b43680
--- /dev/null
+++ b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl
@@ -0,0 +1,74 @@
+using OrdinaryDiffEq
+using Trixi
+using CUDA
+CUDA.allowscalar(false)
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+function initial_condition_taylor_green_vortex(x, t,
+                                               equations::CompressibleEulerEquations3D)
+    A  = 1.0 # magnitude of speed
+    Ms = 0.1 # maximum Mach number
+
+    rho = 1.0
+    v1  =  A * sin(x[1]) * cos(x[2]) * cos(x[3])
+    v2  = -A * cos(x[1]) * sin(x[2]) * cos(x[3])
+    v3  = 0.0
+    p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+    p   = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) +
+          2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3]))
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+initial_condition = initial_condition_taylor_green_vortex
+
+#volume_flux = flux_ranocha
+volume_flux = flux_lax_friedrichs
+solver = DGSEM(polydeg=5, surface_flux=volume_flux,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (-1.0, -1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0,  1.0) .* pi
+
+initial_refinement_level = 1
+trees_per_dimension = (4, 4, 4)
+
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=true, initial_refinement_level=initial_refinement_level)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1000.0)
+ode = semidiscretize(semi, tspan; adapt_to=CuArray)
+
+summary_callback = SummaryCallback()
+
+stepsize_callback = StepsizeCallback(cfl=0.1)
+
+callbacks = CallbackSet(summary_callback, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+maxiters=200
+
+# disable warnings when maxiters is reached
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0,
+            save_everystep=false, callback=callbacks,
+            maxiters=maxiters, verbose=false);
+
+# print the timer summary
+summary_callback()
+
+finalize(mesh)
diff --git a/esiwace/jobscripts/single_node.sh b/esiwace/jobscripts/single_node.sh
new file mode 100644
index 00000000000..a0cf8b6742f
--- /dev/null
+++ b/esiwace/jobscripts/single_node.sh
@@ -0,0 +1,16 @@
+#!/bin/bash -x
+#SBATCH --cluster=hpda2
+#SBATCH --partition=hpda2_compute_gpu
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --mail-user=<mail_addr>
+#SBATCH --mail-type=all
+#SBATCH --export=NONE
+#SBATCH --output=stdout.%j
+#SBATCH --error=stderr.%j
+#SBATCH --time=00:30:00
+
+source profile
+
+srun $JL --threads=1 --project=. run.jl
diff --git a/esiwace/run.jl b/esiwace/run.jl
new file mode 100644
index 00000000000..21b5e6d5975
--- /dev/null
+++ b/esiwace/run.jl
@@ -0,0 +1,66 @@
+using Trixi
+using MPI
+using TimerOutputs
+using CUDA
+
+function main(elixir_path)
+
+    comm = MPI.COMM_WORLD
+    rank = MPI.Comm_rank(comm)
+    isroot = rank == 0
+
+    # pin rank to device?
+    #if machine == "jedi"
+    #    CUDA.device!(rank % 4)
+    #end
+    print("Rank $rank has device: $(CUDA.device())\n")
+
+    # setup
+    maxiters = 400
+
+    if isroot
+        println("Warming up...")
+    end
+
+    # start simulation with tiny final time to trigger precompilation
+    duration_precompile = @elapsed trixi_include(elixir_path,
+        tspan=(0.0, 1e-14))
+
+    if isroot
+        println("Finished warm-up in $duration_precompile seconds\n")
+        println("Starting simulation...")
+    end
+
+    # start the real simulation
+    duration_elixir = @elapsed trixi_include(elixir_path, maxiters=maxiters)
+
+    # store metrics (on every rank!)
+    metrics = Dict{String, Float64}("elapsed time" => duration_elixir)
+
+    # read TimerOutputs timings
+    timer = Trixi.timer()
+    metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer)
+    metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"])
+
+    # compute performance index
+    nrhscalls = Trixi.ncalls(semi.performance_counter)
+    walltime = 1.0e-9 * take!(semi.performance_counter)
+    metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls)
+
+    # gather metrics from all ranks
+    gathered_metrics = MPI.gather(metrics, comm)
+
+    if isroot
+        # reduce metrics per rank
+        open("metrics.out", "w") do io
+            for (key, _) in gathered_metrics[1]
+                println(io, key, ": ", mapreduce(x->x[key], min, gathered_metrics))
+            end
+        end
+    end
+end
+
+# hardcoded elixir
+elixir_path = joinpath(@__DIR__(), "elixirs/elixir_euler_taylor_green_vortex.jl")
+
+main(elixir_path)

From fd5831e75c409f30264b1a459ae0c97e4bf75013 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <di38qiw@dlr-login.cos.lrz.de>
Date: Tue, 17 Dec 2024 13:07:21 +0100
Subject: [PATCH 82/89] current julia project

---
 esiwace/LocalPreferences.toml |   30 +
 esiwace/Manifest.toml         | 2236 +++++++++++++++++++++++++++++++++
 esiwace/Project.toml          |   11 +
 esiwace/profile               |   11 +
 4 files changed, 2288 insertions(+)
 create mode 100644 esiwace/LocalPreferences.toml
 create mode 100644 esiwace/Manifest.toml
 create mode 100644 esiwace/Project.toml
 create mode 100644 esiwace/profile

diff --git a/esiwace/LocalPreferences.toml b/esiwace/LocalPreferences.toml
new file mode 100644
index 00000000000..76b2b9b6cd0
--- /dev/null
+++ b/esiwace/LocalPreferences.toml
@@ -0,0 +1,30 @@
+[CUDA_Runtime_jll]
+local = "true"
+version = "12.6"
+
+[HDF5]
+libhdf5 = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5.so"
+libhdf5_hl = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5_hl.so"
+
+[HDF5_jll]
+libhdf5_hl_path = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5_hl.so"
+libhdf5_path = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5.so"
+
+[MPIPreferences]
+__clear__ = ["preloads_env_switch"]
+_format = "1.0"
+abi = "OpenMPI"
+binary = "system"
+cclibs = []
+libmpi = "libmpi"
+mpiexec = "srun"
+preloads = []
+
+[P4est]
+libp4est = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libp4est.so"
+libsc = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libsc.so"
+
+[T8code]
+libp4est = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libp4est.so"
+libsc = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libsc.so"
+libt8 = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libt8.so"
diff --git a/esiwace/Manifest.toml b/esiwace/Manifest.toml
new file mode 100644
index 00000000000..32ff93ba07e
--- /dev/null
+++ b/esiwace/Manifest.toml
@@ -0,0 +1,2236 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.11.2"
+manifest_format = "2.0"
+project_hash = "51af5189a097c771e1ad670a00f207ba8c9fed4b"
+
+[[deps.ADTypes]]
+git-tree-sha1 = "72af59f5b8f09faee36b4ec48e014a79210f2f4f"
+uuid = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+version = "1.11.0"
+weakdeps = ["ChainRulesCore", "ConstructionBase", "EnzymeCore"]
+
+    [deps.ADTypes.extensions]
+    ADTypesChainRulesCoreExt = "ChainRulesCore"
+    ADTypesConstructionBaseExt = "ConstructionBase"
+    ADTypesEnzymeCoreExt = "EnzymeCore"
+
+[[deps.AbstractFFTs]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef"
+uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+version = "1.5.0"
+weakdeps = ["ChainRulesCore", "Test"]
+
+    [deps.AbstractFFTs.extensions]
+    AbstractFFTsChainRulesCoreExt = "ChainRulesCore"
+    AbstractFFTsTestExt = "Test"
+
+[[deps.Accessors]]
+deps = ["CompositionsBase", "ConstructionBase", "InverseFunctions", "LinearAlgebra", "MacroTools", "Markdown"]
+git-tree-sha1 = "96bed9b1b57cf750cca50c311a197e306816a1cc"
+uuid = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
+version = "0.1.39"
+
+    [deps.Accessors.extensions]
+    AccessorsAxisKeysExt = "AxisKeys"
+    AccessorsDatesExt = "Dates"
+    AccessorsIntervalSetsExt = "IntervalSets"
+    AccessorsStaticArraysExt = "StaticArrays"
+    AccessorsStructArraysExt = "StructArrays"
+    AccessorsTestExt = "Test"
+    AccessorsUnitfulExt = "Unitful"
+
+    [deps.Accessors.weakdeps]
+    AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
+    Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+    IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
+    Requires = "ae029012-a4dd-5104-9daa-d747884805df"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+    StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+    Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+    Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+
+[[deps.Adapt]]
+deps = ["LinearAlgebra", "Requires"]
+git-tree-sha1 = "50c3c56a52972d78e8be9fd135bfb91c9574c140"
+uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+version = "4.1.1"
+weakdeps = ["StaticArrays"]
+
+    [deps.Adapt.extensions]
+    AdaptStaticArraysExt = "StaticArrays"
+
+[[deps.AliasTables]]
+deps = ["PtrArrays", "Random"]
+git-tree-sha1 = "9876e1e164b144ca45e9e3198d0b689cadfed9ff"
+uuid = "66dad0bd-aa9a-41b7-9441-69ab47430ed8"
+version = "1.1.3"
+
+[[deps.ArgCheck]]
+git-tree-sha1 = "680b3b8759bd4c54052ada14e52355ab69e07876"
+uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
+version = "2.4.0"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.ArnoldiMethod]]
+deps = ["LinearAlgebra", "Random", "StaticArrays"]
+git-tree-sha1 = "d57bd3762d308bded22c3b82d033bff85f6195c6"
+uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
+version = "0.4.0"
+
+[[deps.ArrayInterface]]
+deps = ["Adapt", "LinearAlgebra"]
+git-tree-sha1 = "017fcb757f8e921fb44ee063a7aafe5f89b86dd1"
+uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+version = "7.18.0"
+
+    [deps.ArrayInterface.extensions]
+    ArrayInterfaceBandedMatricesExt = "BandedMatrices"
+    ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices"
+    ArrayInterfaceCUDAExt = "CUDA"
+    ArrayInterfaceCUDSSExt = "CUDSS"
+    ArrayInterfaceChainRulesCoreExt = "ChainRulesCore"
+    ArrayInterfaceChainRulesExt = "ChainRules"
+    ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore"
+    ArrayInterfaceReverseDiffExt = "ReverseDiff"
+    ArrayInterfaceSparseArraysExt = "SparseArrays"
+    ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore"
+    ArrayInterfaceTrackerExt = "Tracker"
+
+    [deps.ArrayInterface.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
+    ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.ArrayLayouts]]
+deps = ["FillArrays", "LinearAlgebra"]
+git-tree-sha1 = "2bf6e01f453284cb61c312836b4680331ddfc44b"
+uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
+version = "1.11.0"
+weakdeps = ["SparseArrays"]
+
+    [deps.ArrayLayouts.extensions]
+    ArrayLayoutsSparseArraysExt = "SparseArrays"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Atomix]]
+deps = ["UnsafeAtomics"]
+git-tree-sha1 = "c3b238aa28c1bebd4b5ea4988bebf27e9a01b72b"
+uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
+version = "1.0.1"
+
+    [deps.Atomix.extensions]
+    AtomixCUDAExt = "CUDA"
+    AtomixMetalExt = "Metal"
+    AtomixoneAPIExt = "oneAPI"
+
+    [deps.Atomix.weakdeps]
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+    oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
+
+[[deps.AutoHashEquals]]
+git-tree-sha1 = "4ec6b48702dacc5994a835c1189831755e4e76ef"
+uuid = "15f4f7f2-30c1-5605-9d31-71845cf9641f"
+version = "2.2.0"
+
+[[deps.BFloat16s]]
+deps = ["LinearAlgebra", "Printf", "Random", "Test"]
+git-tree-sha1 = "2c7cc21e8678eff479978a0a2ef5ce2f51b63dff"
+uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
+version = "0.5.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.BitTwiddlingConvenienceFunctions]]
+deps = ["Static"]
+git-tree-sha1 = "f21cfd4950cb9f0587d5067e69405ad2acd27b87"
+uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
+version = "0.1.6"
+
+[[deps.BracketingNonlinearSolve]]
+deps = ["CommonSolve", "ConcreteStructs", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase"]
+git-tree-sha1 = "95cb19c37ea427617e9795655667712f03058d98"
+uuid = "70df07ce-3d50-431d-a3e7-ca6ddb60ac1e"
+version = "1.1.0"
+weakdeps = ["ForwardDiff"]
+
+    [deps.BracketingNonlinearSolve.extensions]
+    BracketingNonlinearSolveForwardDiffExt = "ForwardDiff"
+
+[[deps.CEnum]]
+git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc"
+uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
+version = "0.5.0"
+
+[[deps.CPUSummary]]
+deps = ["CpuId", "IfElse", "PrecompileTools", "Static"]
+git-tree-sha1 = "5a97e67919535d6841172016c9530fd69494e5ec"
+uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
+version = "0.2.6"
+
+[[deps.CUDA]]
+deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics", "demumble_jll"]
+git-tree-sha1 = "e0725a467822697171af4dae15cec10b4fc19053"
+uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
+version = "5.5.2"
+weakdeps = ["ChainRulesCore", "EnzymeCore", "SpecialFunctions"]
+
+    [deps.CUDA.extensions]
+    ChainRulesCoreExt = "ChainRulesCore"
+    EnzymeCoreExt = "EnzymeCore"
+    SpecialFunctionsExt = "SpecialFunctions"
+
+[[deps.CUDA_Driver_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "14996d716a2eaaeccfc8d7bc854dd87fde720ac1"
+uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc"
+version = "0.10.4+0"
+
+[[deps.CUDA_Runtime_Discovery]]
+deps = ["Libdl"]
+git-tree-sha1 = "33576c7c1b2500f8e7e6baa082e04563203b3a45"
+uuid = "1af6417a-86b4-443c-805f-a4643ffb695f"
+version = "0.3.5"
+
+[[deps.CUDA_Runtime_jll]]
+deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
+git-tree-sha1 = "17f1536c600133f7c4113bae0a2d98dbf27c7ebc"
+uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
+version = "0.15.5+0"
+
+[[deps.ChainRulesCore]]
+deps = ["Compat", "LinearAlgebra"]
+git-tree-sha1 = "3e4b134270b372f2ed4d4d0e936aabaefc1802bc"
+uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+version = "1.25.0"
+weakdeps = ["SparseArrays"]
+
+    [deps.ChainRulesCore.extensions]
+    ChainRulesCoreSparseArraysExt = "SparseArrays"
+
+[[deps.CloseOpenIntervals]]
+deps = ["Static", "StaticArrayInterface"]
+git-tree-sha1 = "05ba0d07cd4fd8b7a39541e31a7b0254704ea581"
+uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
+version = "0.1.13"
+
+[[deps.CodeTracking]]
+deps = ["InteractiveUtils", "UUIDs"]
+git-tree-sha1 = "7eee164f122511d3e4e1ebadb7956939ea7e1c77"
+uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
+version = "1.3.6"
+
+[[deps.CodecZlib]]
+deps = ["TranscodingStreams", "Zlib_jll"]
+git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.7.6"
+
+[[deps.ColorTypes]]
+deps = ["FixedPointNumbers", "Random"]
+git-tree-sha1 = "c7acce7a7e1078a20a285211dd73cd3941a871d6"
+uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
+version = "0.12.0"
+
+    [deps.ColorTypes.extensions]
+    StyledStringsExt = "StyledStrings"
+
+    [deps.ColorTypes.weakdeps]
+    StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+
+[[deps.Colors]]
+deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
+git-tree-sha1 = "64e15186f0aa277e174aa81798f7eb8598e0157e"
+uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
+version = "0.13.0"
+
+[[deps.CommonSolve]]
+git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c"
+uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
+version = "0.2.4"
+
+[[deps.CommonSubexpressions]]
+deps = ["MacroTools"]
+git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5"
+uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
+version = "0.3.1"
+
+[[deps.CommonWorldInvalidations]]
+git-tree-sha1 = "ae52d1c52048455e85a387fbee9be553ec2b68d0"
+uuid = "f70d9fcc-98c5-4d4a-abd7-e4cdeebd8ca8"
+version = "1.0.0"
+
+[[deps.Compat]]
+deps = ["TOML", "UUIDs"]
+git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "4.16.0"
+weakdeps = ["Dates", "LinearAlgebra"]
+
+    [deps.Compat.extensions]
+    CompatLinearAlgebraExt = "LinearAlgebra"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.1.1+0"
+
+[[deps.CompositionsBase]]
+git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad"
+uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b"
+version = "0.1.2"
+weakdeps = ["InverseFunctions"]
+
+    [deps.CompositionsBase.extensions]
+    CompositionsBaseInverseFunctionsExt = "InverseFunctions"
+
+[[deps.ConcreteStructs]]
+git-tree-sha1 = "f749037478283d372048690eb3b5f92a79432b34"
+uuid = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
+version = "0.2.3"
+
+[[deps.ConstructionBase]]
+git-tree-sha1 = "76219f1ed5771adbb096743bff43fb5fdd4c1157"
+uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
+version = "1.5.8"
+
+    [deps.ConstructionBase.extensions]
+    ConstructionBaseIntervalSetsExt = "IntervalSets"
+    ConstructionBaseLinearAlgebraExt = "LinearAlgebra"
+    ConstructionBaseStaticArraysExt = "StaticArrays"
+
+    [deps.ConstructionBase.weakdeps]
+    IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
+    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[[deps.CpuId]]
+deps = ["Markdown"]
+git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406"
+uuid = "adafc99b-e345-5852-983c-f28acb93d879"
+version = "0.3.1"
+
+[[deps.Crayons]]
+git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
+uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
+version = "4.1.1"
+
+[[deps.DataAPI]]
+git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe"
+uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
+version = "1.16.0"
+
+[[deps.DataFrames]]
+deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "fb61b4812c49343d7ef0b533ba982c46021938a6"
+uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+version = "1.7.0"
+
+[[deps.DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.20"
+
+[[deps.DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
+
+[[deps.DelimitedFiles]]
+deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+version = "1.9.1"
+
+[[deps.DiffEqBase]]
+deps = ["ArrayInterface", "ConcreteStructs", "DataStructures", "DocStringExtensions", "EnumX", "EnzymeCore", "FastBroadcast", "FastClosures", "FastPower", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "Markdown", "MuladdMacro", "Parameters", "PreallocationTools", "PrecompileTools", "Printf", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "Setfield", "Static", "StaticArraysCore", "Statistics", "TruncatedStacktraces"]
+git-tree-sha1 = "b1e23a7fe7371934d9d538114a7e7166c1d09e05"
+uuid = "2b5f629d-d688-5b77-993f-72d75c75574e"
+version = "6.161.0"
+
+    [deps.DiffEqBase.extensions]
+    DiffEqBaseCUDAExt = "CUDA"
+    DiffEqBaseChainRulesCoreExt = "ChainRulesCore"
+    DiffEqBaseDistributionsExt = "Distributions"
+    DiffEqBaseEnzymeExt = ["ChainRulesCore", "Enzyme"]
+    DiffEqBaseGeneralizedGeneratedExt = "GeneralizedGenerated"
+    DiffEqBaseMPIExt = "MPI"
+    DiffEqBaseMeasurementsExt = "Measurements"
+    DiffEqBaseMonteCarloMeasurementsExt = "MonteCarloMeasurements"
+    DiffEqBaseReverseDiffExt = "ReverseDiff"
+    DiffEqBaseSparseArraysExt = "SparseArrays"
+    DiffEqBaseTrackerExt = "Tracker"
+    DiffEqBaseUnitfulExt = "Unitful"
+
+    [deps.DiffEqBase.weakdeps]
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+    Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+    GeneralizedGenerated = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb"
+    MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+    Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
+    MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+    Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+
+[[deps.DiffEqCallbacks]]
+deps = ["ConcreteStructs", "DataStructures", "DiffEqBase", "DifferentiationInterface", "Functors", "LinearAlgebra", "Markdown", "RecipesBase", "RecursiveArrayTools", "SciMLBase", "StaticArraysCore"]
+git-tree-sha1 = "f6bc598f21c7bf2f7885cff9b3c9078e606ab075"
+uuid = "459566f4-90b8-5000-8ac3-15dfb0a30def"
+version = "4.2.2"
+
+[[deps.DiffResults]]
+deps = ["StaticArraysCore"]
+git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621"
+uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+version = "1.1.0"
+
+[[deps.DiffRules]]
+deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
+git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272"
+uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
+version = "1.15.1"
+
+[[deps.DifferentiationInterface]]
+deps = ["ADTypes", "LinearAlgebra"]
+git-tree-sha1 = "7ffe68edc2a4ad0ff2eeb5a32cc99ab45746fe3d"
+uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
+version = "0.6.27"
+
+    [deps.DifferentiationInterface.extensions]
+    DifferentiationInterfaceChainRulesCoreExt = "ChainRulesCore"
+    DifferentiationInterfaceDiffractorExt = "Diffractor"
+    DifferentiationInterfaceEnzymeExt = ["EnzymeCore", "Enzyme"]
+    DifferentiationInterfaceFastDifferentiationExt = "FastDifferentiation"
+    DifferentiationInterfaceFiniteDiffExt = "FiniteDiff"
+    DifferentiationInterfaceFiniteDifferencesExt = "FiniteDifferences"
+    DifferentiationInterfaceForwardDiffExt = ["ForwardDiff", "DiffResults"]
+    DifferentiationInterfaceMooncakeExt = "Mooncake"
+    DifferentiationInterfacePolyesterForwardDiffExt = "PolyesterForwardDiff"
+    DifferentiationInterfaceReverseDiffExt = ["ReverseDiff", "DiffResults"]
+    DifferentiationInterfaceSparseArraysExt = "SparseArrays"
+    DifferentiationInterfaceSparseMatrixColoringsExt = "SparseMatrixColorings"
+    DifferentiationInterfaceStaticArraysExt = "StaticArrays"
+    DifferentiationInterfaceSymbolicsExt = "Symbolics"
+    DifferentiationInterfaceTrackerExt = "Tracker"
+    DifferentiationInterfaceZygoteExt = ["Zygote", "ForwardDiff"]
+
+    [deps.DifferentiationInterface.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+    Diffractor = "9f5e2b26-1114-432f-b630-d3fe2085c51c"
+    Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
+    FastDifferentiation = "eb9bf01b-bf85-4b60-bf87-ee5de06c00be"
+    FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
+    FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
+    PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+    Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+version = "1.11.0"
+
+[[deps.DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
+uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+version = "0.9.3"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.EllipsisNotation]]
+deps = ["StaticArrayInterface"]
+git-tree-sha1 = "3507300d4343e8e4ad080ad24e335274c2e297a9"
+uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
+version = "1.8.0"
+
+[[deps.EnumX]]
+git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237"
+uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
+version = "1.0.4"
+
+[[deps.EnzymeCore]]
+git-tree-sha1 = "0cdb7af5c39e92d78a0ee8d0a447d32f7593137e"
+uuid = "f151be2c-9106-41f4-ab19-57ee4f262869"
+version = "0.8.8"
+weakdeps = ["Adapt"]
+
+    [deps.EnzymeCore.extensions]
+    AdaptExt = "Adapt"
+
+[[deps.ExponentialUtilities]]
+deps = ["Adapt", "ArrayInterface", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "PrecompileTools", "Printf", "SparseArrays", "libblastrampoline_jll"]
+git-tree-sha1 = "cae251c76f353e32d32d76fae2fea655eab652af"
+uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18"
+version = "1.27.0"
+weakdeps = ["StaticArrays"]
+
+    [deps.ExponentialUtilities.extensions]
+    ExponentialUtilitiesStaticArraysExt = "StaticArrays"
+
+[[deps.ExprTools]]
+git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec"
+uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
+version = "0.1.10"
+
+[[deps.Expronicon]]
+deps = ["MLStyle", "Pkg", "TOML"]
+git-tree-sha1 = "fc3951d4d398b5515f91d7fe5d45fc31dccb3c9b"
+uuid = "6b7a57c9-7cc1-4fdf-b7f5-e857abae3636"
+version = "0.8.5"
+
+[[deps.FFTW]]
+deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"]
+git-tree-sha1 = "4820348781ae578893311153d69049a93d05f39d"
+uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
+version = "1.8.0"
+
+[[deps.FFTW_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "4d81ed14783ec49ce9f2e168208a12ce1815aa25"
+uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a"
+version = "3.3.10+1"
+
+[[deps.FastBroadcast]]
+deps = ["ArrayInterface", "LinearAlgebra", "Polyester", "Static", "StaticArrayInterface", "StrideArraysCore"]
+git-tree-sha1 = "ab1b34570bcdf272899062e1a56285a53ecaae08"
+uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
+version = "0.3.5"
+
+[[deps.FastClosures]]
+git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef"
+uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
+version = "0.3.2"
+
+[[deps.FastGaussQuadrature]]
+deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"]
+git-tree-sha1 = "fd923962364b645f3719855c88f7074413a6ad92"
+uuid = "442a2c76-b920-505d-bb47-c5924d526838"
+version = "1.0.2"
+
+[[deps.FastLapackInterface]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "cbf5edddb61a43669710cbc2241bc08b36d9e660"
+uuid = "29a986be-02c6-4525-aec4-84b980013641"
+version = "2.0.4"
+
+[[deps.FastPower]]
+git-tree-sha1 = "58c3431137131577a7c379d00fea00be524338fb"
+uuid = "a4df4552-cc26-4903-aec0-212e50a0e84b"
+version = "1.1.1"
+
+    [deps.FastPower.extensions]
+    FastPowerEnzymeExt = "Enzyme"
+    FastPowerForwardDiffExt = "ForwardDiff"
+    FastPowerMeasurementsExt = "Measurements"
+    FastPowerMonteCarloMeasurementsExt = "MonteCarloMeasurements"
+    FastPowerReverseDiffExt = "ReverseDiff"
+    FastPowerTrackerExt = "Tracker"
+
+    [deps.FastPower.weakdeps]
+    Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
+    MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.FillArrays]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "6a70198746448456524cb442b8af316927ff3e1a"
+uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
+version = "1.13.0"
+
+    [deps.FillArrays.extensions]
+    FillArraysPDMatsExt = "PDMats"
+    FillArraysSparseArraysExt = "SparseArrays"
+    FillArraysStatisticsExt = "Statistics"
+
+    [deps.FillArrays.weakdeps]
+    PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[deps.FiniteDiff]]
+deps = ["ArrayInterface", "LinearAlgebra", "Setfield"]
+git-tree-sha1 = "84e3a47db33be7248daa6274b287507dd6ff84e8"
+uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
+version = "2.26.2"
+
+    [deps.FiniteDiff.extensions]
+    FiniteDiffBandedMatricesExt = "BandedMatrices"
+    FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices"
+    FiniteDiffSparseArraysExt = "SparseArrays"
+    FiniteDiffStaticArraysExt = "StaticArrays"
+
+    [deps.FiniteDiff.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[[deps.FixedPointNumbers]]
+deps = ["Statistics"]
+git-tree-sha1 = "05882d6995ae5c12bb5f36dd2ed3f61c98cbb172"
+uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
+version = "0.8.5"
+
+[[deps.ForwardDiff]]
+deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"]
+git-tree-sha1 = "a2df1b776752e3f344e5116c06d75a10436ab853"
+uuid = "f6369f11-7733-5829-9624-2563aa707210"
+version = "0.10.38"
+weakdeps = ["StaticArrays"]
+
+    [deps.ForwardDiff.extensions]
+    ForwardDiffStaticArraysExt = "StaticArrays"
+
+[[deps.FunctionWrappers]]
+git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e"
+uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
+version = "1.1.3"
+
+[[deps.FunctionWrappersWrappers]]
+deps = ["FunctionWrappers"]
+git-tree-sha1 = "b104d487b34566608f8b4e1c39fb0b10aa279ff8"
+uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf"
+version = "0.1.3"
+
+[[deps.Functors]]
+deps = ["Compat", "ConstructionBase", "LinearAlgebra", "Random"]
+git-tree-sha1 = "60a0339f28a233601cb74468032b5c302d5067de"
+uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
+version = "0.5.2"
+
+[[deps.Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
+
+[[deps.GPUArrays]]
+deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
+git-tree-sha1 = "62ee71528cca49be797076a76bdc654a170a523e"
+uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
+version = "10.3.1"
+
+[[deps.GPUArraysCore]]
+deps = ["Adapt"]
+git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950"
+uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
+version = "0.1.6"
+
+[[deps.GPUCompiler]]
+deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "PrecompileTools", "Preferences", "Scratch", "Serialization", "TOML", "TimerOutputs", "UUIDs"]
+git-tree-sha1 = "1d6f290a5eb1201cd63574fbc4440c788d5cb38f"
+uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
+version = "0.27.8"
+
+[[deps.GaussQuadrature]]
+deps = ["SpecialFunctions"]
+git-tree-sha1 = "eb6f1f48aa994f3018cbd029a17863c6535a266d"
+uuid = "d54b0c1a-921d-58e0-8e36-89d8069c0969"
+version = "0.5.8"
+
+[[deps.GenericSchur]]
+deps = ["LinearAlgebra", "Printf"]
+git-tree-sha1 = "af49a0851f8113fcfae2ef5027c6d49d0acec39b"
+uuid = "c145ed77-6b09-5dd9-b285-bf645a82121e"
+version = "0.5.4"
+
+[[deps.Graphs]]
+deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
+git-tree-sha1 = "1dc470db8b1131cfc7fb4c115de89fe391b9e780"
+uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
+version = "1.12.0"
+
+[[deps.HDF5]]
+deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"]
+git-tree-sha1 = "e856eef26cf5bf2b0f95f8f4fc37553c72c8641c"
+uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
+version = "0.17.2"
+weakdeps = ["MPI"]
+
+    [deps.HDF5.extensions]
+    MPIExt = "MPI"
+
+[[deps.HDF5_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"]
+git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739"
+uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
+version = "1.14.2+1"
+
+[[deps.HostCPUFeatures]]
+deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
+git-tree-sha1 = "8e070b599339d622e9a081d17230d74a5c473293"
+uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
+version = "0.1.17"
+
+[[deps.Hwloc_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "50aedf345a709ab75872f80a2779568dc0bb461b"
+uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
+version = "2.11.2+1"
+
+[[deps.IfElse]]
+git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
+uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
+version = "0.1.1"
+
+[[deps.Inflate]]
+git-tree-sha1 = "d1b1b796e47d94588b3757fe84fbf65a5ec4a80d"
+uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
+version = "0.1.5"
+
+[[deps.InlineStrings]]
+git-tree-sha1 = "45521d31238e87ee9f9732561bfee12d4eebd52d"
+uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+version = "1.4.2"
+
+    [deps.InlineStrings.extensions]
+    ArrowTypesExt = "ArrowTypes"
+    ParsersExt = "Parsers"
+
+    [deps.InlineStrings.weakdeps]
+    ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
+    Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+
+[[deps.IntelOpenMP_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl"]
+git-tree-sha1 = "10bd689145d2c3b2a9844005d01087cc1194e79e"
+uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0"
+version = "2024.2.1+0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.InverseFunctions]]
+git-tree-sha1 = "a779299d77cd080bf77b97535acecd73e1c5e5cb"
+uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
+version = "0.1.17"
+weakdeps = ["Dates", "Test"]
+
+    [deps.InverseFunctions.extensions]
+    InverseFunctionsDatesExt = "Dates"
+    InverseFunctionsTestExt = "Test"
+
+[[deps.InvertedIndices]]
+git-tree-sha1 = "6da3c4316095de0f5ee2ebd875df8721e7e0bdbe"
+uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+version = "1.3.1"
+
+[[deps.IrrationalConstants]]
+git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2"
+uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
+version = "0.2.2"
+
+[[deps.IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.6.1"
+
+[[deps.JuliaNVTXCallbacks_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f"
+uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e"
+version = "0.2.1+0"
+
+[[deps.KLU]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"]
+git-tree-sha1 = "07649c499349dad9f08dde4243a4c597064663e9"
+uuid = "ef3ab10e-7fda-4108-b977-705223b18434"
+version = "0.6.0"
+
+[[deps.KernelAbstractions]]
+deps = ["Adapt", "Atomix", "InteractiveUtils", "MacroTools", "PrecompileTools", "Requires", "StaticArrays", "UUIDs"]
+git-tree-sha1 = "b9a838cd3028785ac23822cded5126b3da394d1a"
+uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+version = "0.9.31"
+weakdeps = ["EnzymeCore", "LinearAlgebra", "SparseArrays"]
+
+    [deps.KernelAbstractions.extensions]
+    EnzymeExt = "EnzymeCore"
+    LinearAlgebraExt = "LinearAlgebra"
+    SparseArraysExt = "SparseArrays"
+
+[[deps.Kronecker]]
+deps = ["LinearAlgebra", "NamedDims", "SparseArrays", "StatsBase"]
+git-tree-sha1 = "9253429e28cceae6e823bec9ffde12460d79bb38"
+uuid = "2c470bb0-bcc8-11e8-3dad-c9649493f05e"
+version = "0.5.5"
+
+[[deps.Krylov]]
+deps = ["LinearAlgebra", "Printf", "SparseArrays"]
+git-tree-sha1 = "4f20a2df85a9e5d55c9e84634bbf808ed038cabd"
+uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
+version = "0.9.8"
+
+[[deps.LLVM]]
+deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Unicode"]
+git-tree-sha1 = "d422dfd9707bec6617335dc2ea3c5172a87d5908"
+uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
+version = "9.1.3"
+weakdeps = ["BFloat16s"]
+
+    [deps.LLVM.extensions]
+    BFloat16sExt = "BFloat16s"
+
+[[deps.LLVMExtra_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
+git-tree-sha1 = "05a8bd5a42309a9ec82f700876903abce1017dd3"
+uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
+version = "0.0.34+0"
+
+[[deps.LLVMLoopInfo]]
+git-tree-sha1 = "2e5c102cfc41f48ae4740c7eca7743cc7e7b75ea"
+uuid = "8b046642-f1f6-4319-8d3c-209ddc03c586"
+version = "1.0.0"
+
+[[deps.LLVMOpenMP_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "78211fb6cbc872f77cad3fc0b6cf647d923f4929"
+uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
+version = "18.1.7+0"
+
+[[deps.LaTeXStrings]]
+git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"
+uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+version = "1.4.0"
+
+[[deps.LayoutPointers]]
+deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"]
+git-tree-sha1 = "a9eaadb366f5493a5654e843864c13d8b107548c"
+uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
+version = "0.1.17"
+
+[[deps.LazyArrays]]
+deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "MacroTools", "SparseArrays"]
+git-tree-sha1 = "f289bee714e11708df257c57514585863aa02b33"
+uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02"
+version = "2.3.1"
+
+    [deps.LazyArrays.extensions]
+    LazyArraysBandedMatricesExt = "BandedMatrices"
+    LazyArraysBlockArraysExt = "BlockArrays"
+    LazyArraysBlockBandedMatricesExt = "BlockBandedMatrices"
+    LazyArraysStaticArraysExt = "StaticArrays"
+
+    [deps.LazyArrays.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[[deps.LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+version = "1.11.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.6.0+0"
+
+[[deps.LibGit2]]
+deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.7.2+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "61dfdba58e585066d8bce214c5a51eaa0539f269"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.17.0+1"
+
+[[deps.LightXML]]
+deps = ["Libdl", "XML2_jll"]
+git-tree-sha1 = "3a994404d3f6709610701c7dabfc03fed87a81f8"
+uuid = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
+version = "0.9.1"
+
+[[deps.LineSearch]]
+deps = ["ADTypes", "CommonSolve", "ConcreteStructs", "FastClosures", "LinearAlgebra", "MaybeInplace", "SciMLBase", "SciMLJacobianOperators", "StaticArraysCore"]
+git-tree-sha1 = "97d502765cc5cf3a722120f50da03c2474efce04"
+uuid = "87fe0de2-c867-4266-b59a-2f0a94fc965b"
+version = "0.1.4"
+weakdeps = ["LineSearches"]
+
+    [deps.LineSearch.extensions]
+    LineSearchLineSearchesExt = "LineSearches"
+
+[[deps.LineSearches]]
+deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
+git-tree-sha1 = "e4c3be53733db1051cc15ecf573b1042b3a712a1"
+uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
+version = "7.3.0"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+version = "1.11.0"
+
+[[deps.LinearMaps]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "ee79c3208e55786de58f8dcccca098ced79f743f"
+uuid = "7a12625a-238d-50fd-b39a-03d52299707e"
+version = "3.11.3"
+weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"]
+
+    [deps.LinearMaps.extensions]
+    LinearMapsChainRulesCoreExt = "ChainRulesCore"
+    LinearMapsSparseArraysExt = "SparseArrays"
+    LinearMapsStatisticsExt = "Statistics"
+
+[[deps.LinearSolve]]
+deps = ["ArrayInterface", "ChainRulesCore", "ConcreteStructs", "DocStringExtensions", "EnumX", "FastLapackInterface", "GPUArraysCore", "InteractiveUtils", "KLU", "Krylov", "LazyArrays", "Libdl", "LinearAlgebra", "MKL_jll", "Markdown", "PrecompileTools", "Preferences", "RecursiveFactorization", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Sparspak", "StaticArraysCore", "UnPack"]
+git-tree-sha1 = "9d5872d134bd33dd3e120767004f760770958863"
+uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+version = "2.38.0"
+
+    [deps.LinearSolve.extensions]
+    LinearSolveBandedMatricesExt = "BandedMatrices"
+    LinearSolveBlockDiagonalsExt = "BlockDiagonals"
+    LinearSolveCUDAExt = "CUDA"
+    LinearSolveCUDSSExt = "CUDSS"
+    LinearSolveEnzymeExt = "EnzymeCore"
+    LinearSolveFastAlmostBandedMatricesExt = "FastAlmostBandedMatrices"
+    LinearSolveHYPREExt = "HYPRE"
+    LinearSolveIterativeSolversExt = "IterativeSolvers"
+    LinearSolveKernelAbstractionsExt = "KernelAbstractions"
+    LinearSolveKrylovKitExt = "KrylovKit"
+    LinearSolveMetalExt = "Metal"
+    LinearSolvePardisoExt = "Pardiso"
+    LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools"
+
+    [deps.LinearSolve.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
+    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
+    FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e"
+    HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
+    IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
+    KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+    KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
+    Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+    Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
+    RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
+
+[[deps.LogExpFunctions]]
+deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
+git-tree-sha1 = "13ca9e2586b89836fd20cccf56e57e2b9ae7f38f"
+uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+version = "0.3.29"
+
+    [deps.LogExpFunctions.extensions]
+    LogExpFunctionsChainRulesCoreExt = "ChainRulesCore"
+    LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables"
+    LogExpFunctionsInverseFunctionsExt = "InverseFunctions"
+
+    [deps.LogExpFunctions.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
+    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.LoopVectorization]]
+deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "8084c25a250e00ae427a379a5b607e7aed96a2dd"
+uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
+version = "0.12.171"
+weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"]
+
+    [deps.LoopVectorization.extensions]
+    ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"]
+    SpecialFunctionsExt = "SpecialFunctions"
+
+[[deps.MKL_jll]]
+deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "oneTBB_jll"]
+git-tree-sha1 = "f046ccd0c6db2832a9f639e2c669c6fe867e5f4f"
+uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
+version = "2024.2.0+0"
+
+[[deps.MLStyle]]
+git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8"
+uuid = "d8e11817-5142-5d16-987a-aa16d5891078"
+version = "0.4.17"
+
+[[deps.MPI]]
+deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"]
+git-tree-sha1 = "892676019c58f34e38743bc989b0eca5bce5edc5"
+uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+version = "0.20.22"
+
+    [deps.MPI.extensions]
+    AMDGPUExt = "AMDGPU"
+    CUDAExt = "CUDA"
+
+    [deps.MPI.weakdeps]
+    AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+
+[[deps.MPICH_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
+git-tree-sha1 = "7715e65c47ba3941c502bffb7f266a41a7f54423"
+uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
+version = "4.2.3+0"
+
+[[deps.MPIPreferences]]
+deps = ["Libdl", "Preferences"]
+git-tree-sha1 = "c105fe467859e7f6e9a852cb15cb4301126fac07"
+uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+version = "0.1.11"
+
+[[deps.MPItrampoline_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
+git-tree-sha1 = "70e830dab5d0775183c99fc75e4c24c614ed7142"
+uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748"
+version = "5.5.1+0"
+
+[[deps.MacroTools]]
+deps = ["Markdown", "Random"]
+git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df"
+uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+version = "0.5.13"
+
+[[deps.ManualMemory]]
+git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd"
+uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667"
+version = "0.1.8"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MaybeInplace]]
+deps = ["ArrayInterface", "LinearAlgebra", "MacroTools"]
+git-tree-sha1 = "54e2fdc38130c05b42be423e90da3bade29b74bd"
+uuid = "bb5d69b7-63fc-4a16-80bd-7e42200c7bdb"
+version = "0.1.4"
+weakdeps = ["SparseArrays"]
+
+    [deps.MaybeInplace.extensions]
+    MaybeInplaceSparseArraysExt = "SparseArrays"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.6+0"
+
+[[deps.MicrosoftMPI_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "bc95bf4149bf535c09602e3acdf950d9b4376227"
+uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
+version = "10.1.4+3"
+
+[[deps.Missings]]
+deps = ["DataAPI"]
+git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "1.2.0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2023.12.12"
+
+[[deps.MuladdMacro]]
+git-tree-sha1 = "cac9cc5499c25554cba55cd3c30543cff5ca4fab"
+uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
+version = "0.2.4"
+
+[[deps.NLSolversBase]]
+deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
+git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c"
+uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
+version = "7.8.3"
+
+[[deps.NVTX]]
+deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"]
+git-tree-sha1 = "6a6f8bfaa91bb2e40ff562ab9f30dc827741daef"
+uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f"
+version = "0.3.5"
+
+[[deps.NVTX_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b"
+uuid = "e98f9f5b-d649-5603-91fd-7774390e6439"
+version = "3.1.0+2"
+
+[[deps.NaNMath]]
+deps = ["OpenLibm_jll"]
+git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4"
+uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
+version = "1.0.2"
+
+[[deps.NamedDims]]
+deps = ["LinearAlgebra", "Pkg", "Statistics"]
+git-tree-sha1 = "90178dc801073728b8b2d0d8677d10909feb94d8"
+uuid = "356022a1-0364-5f58-8944-0da4b18d706f"
+version = "1.2.2"
+
+    [deps.NamedDims.extensions]
+    AbstractFFTsExt = "AbstractFFTs"
+    ChainRulesCoreExt = "ChainRulesCore"
+    CovarianceEstimationExt = "CovarianceEstimation"
+    TrackerExt = "Tracker"
+
+    [deps.NamedDims.weakdeps]
+    AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    CovarianceEstimation = "587fd27a-f159-11e8-2dae-1979310e6154"
+    Requires = "ae029012-a4dd-5104-9daa-d747884805df"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.NodesAndModes]]
+deps = ["DelimitedFiles", "LinearAlgebra", "SpecialFunctions", "StaticArrays"]
+git-tree-sha1 = "ee6719b4ed5fd08b654017648bf5fa2e2dc8f1ec"
+uuid = "7aca2e03-f7e2-4192-9ec8-f4ca66d597fb"
+version = "1.1.0"
+
+[[deps.NonlinearSolve]]
+deps = ["ADTypes", "ArrayInterface", "BracketingNonlinearSolve", "CommonSolve", "ConcreteStructs", "DiffEqBase", "DifferentiationInterface", "FastClosures", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "LinearSolve", "NonlinearSolveBase", "NonlinearSolveFirstOrder", "NonlinearSolveQuasiNewton", "NonlinearSolveSpectralMethods", "PrecompileTools", "Preferences", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SparseArrays", "SparseMatrixColorings", "StaticArraysCore", "SymbolicIndexingInterface"]
+git-tree-sha1 = "d0caebdb5a31e1a11ca9f7f189cdbf341ac89f0e"
+uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
+version = "4.3.0"
+
+    [deps.NonlinearSolve.extensions]
+    NonlinearSolveFastLevenbergMarquardtExt = "FastLevenbergMarquardt"
+    NonlinearSolveFixedPointAccelerationExt = "FixedPointAcceleration"
+    NonlinearSolveLeastSquaresOptimExt = "LeastSquaresOptim"
+    NonlinearSolveMINPACKExt = "MINPACK"
+    NonlinearSolveNLSolversExt = "NLSolvers"
+    NonlinearSolveNLsolveExt = ["NLsolve", "LineSearches"]
+    NonlinearSolvePETScExt = ["PETSc", "MPI"]
+    NonlinearSolveSIAMFANLEquationsExt = "SIAMFANLEquations"
+    NonlinearSolveSpeedMappingExt = "SpeedMapping"
+    NonlinearSolveSundialsExt = "Sundials"
+
+    [deps.NonlinearSolve.weakdeps]
+    FastLevenbergMarquardt = "7a0df574-e128-4d35-8cbd-3d84502bf7ce"
+    FixedPointAcceleration = "817d07cb-a79a-5c30-9a31-890123675176"
+    LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891"
+    LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
+    MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
+    MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+    NLSolvers = "337daf1e-9722-11e9-073e-8b9effe078ba"
+    NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
+    PETSc = "ace2c81b-2b5f-4b1e-a30d-d662738edfe0"
+    SIAMFANLEquations = "084e46ad-d928-497d-ad5e-07fa361a48c4"
+    SpeedMapping = "f1835b91-879b-4a3f-a438-e4baacf14412"
+    Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
+
+[[deps.NonlinearSolveBase]]
+deps = ["ADTypes", "Adapt", "ArrayInterface", "CommonSolve", "Compat", "ConcreteStructs", "DifferentiationInterface", "EnzymeCore", "FastClosures", "LinearAlgebra", "Markdown", "MaybeInplace", "Preferences", "Printf", "RecursiveArrayTools", "SciMLBase", "SciMLJacobianOperators", "SciMLOperators", "StaticArraysCore", "SymbolicIndexingInterface", "TimerOutputs"]
+git-tree-sha1 = "5bca24ce7b0c034dcbdc6ad6d658b02e0eed566e"
+uuid = "be0214bd-f91f-a760-ac4e-3421ce2b2da0"
+version = "1.4.0"
+
+    [deps.NonlinearSolveBase.extensions]
+    NonlinearSolveBaseBandedMatricesExt = "BandedMatrices"
+    NonlinearSolveBaseDiffEqBaseExt = "DiffEqBase"
+    NonlinearSolveBaseForwardDiffExt = "ForwardDiff"
+    NonlinearSolveBaseLineSearchExt = "LineSearch"
+    NonlinearSolveBaseLinearSolveExt = "LinearSolve"
+    NonlinearSolveBaseSparseArraysExt = "SparseArrays"
+    NonlinearSolveBaseSparseMatrixColoringsExt = "SparseMatrixColorings"
+
+    [deps.NonlinearSolveBase.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b"
+    LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35"
+
+[[deps.NonlinearSolveFirstOrder]]
+deps = ["ADTypes", "ArrayInterface", "CommonSolve", "ConcreteStructs", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "LinearSolve", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "SciMLJacobianOperators", "Setfield", "StaticArraysCore"]
+git-tree-sha1 = "a1ea35ab0bcc99753e26d574ba1e339f19d100fa"
+uuid = "5959db7a-ea39-4486-b5fe-2dd0bf03d60d"
+version = "1.2.0"
+
+[[deps.NonlinearSolveQuasiNewton]]
+deps = ["ArrayInterface", "CommonSolve", "ConcreteStructs", "DiffEqBase", "LinearAlgebra", "LinearSolve", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "SciMLOperators", "StaticArraysCore"]
+git-tree-sha1 = "8f14b848afcfc0a2941cd3cca1bef04c987465bb"
+uuid = "9a2c21bd-3a47-402d-9113-8faf9a0ee114"
+version = "1.1.0"
+weakdeps = ["ForwardDiff"]
+
+    [deps.NonlinearSolveQuasiNewton.extensions]
+    NonlinearSolveQuasiNewtonForwardDiffExt = "ForwardDiff"
+
+[[deps.NonlinearSolveSpectralMethods]]
+deps = ["CommonSolve", "ConcreteStructs", "DiffEqBase", "LineSearch", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase"]
+git-tree-sha1 = "f28b1ab17b5f15eb2b174eaf8813cf17f0b3e6c0"
+uuid = "26075421-4e9a-44e1-8bd1-420ed7ad02b2"
+version = "1.1.0"
+weakdeps = ["ForwardDiff"]
+
+    [deps.NonlinearSolveSpectralMethods.extensions]
+    NonlinearSolveSpectralMethodsForwardDiffExt = "ForwardDiff"
+
+[[deps.Octavian]]
+deps = ["CPUSummary", "IfElse", "LoopVectorization", "ManualMemory", "PolyesterWeave", "PrecompileTools", "Static", "StaticArrayInterface", "ThreadingUtilities", "VectorizationBase"]
+git-tree-sha1 = "92410e147bdcaf9e2f982a7cc9b1341fc5dd1a77"
+uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
+version = "0.3.28"
+
+    [deps.Octavian.extensions]
+    ForwardDiffExt = "ForwardDiff"
+    HyperDualNumbersExt = "HyperDualNumbers"
+
+    [deps.Octavian.weakdeps]
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
+
+[[deps.OffsetArrays]]
+git-tree-sha1 = "39d000d9c33706b8364817d8894fae1548f40295"
+uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
+version = "1.14.2"
+weakdeps = ["Adapt"]
+
+    [deps.OffsetArrays.extensions]
+    OffsetArraysAdaptExt = "Adapt"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.27+1"
+
+[[deps.OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.1+2"
+
+[[deps.OpenMPI_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML", "Zlib_jll"]
+git-tree-sha1 = "2dace87e14256edb1dd0724ab7ba831c779b96bd"
+uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
+version = "5.0.6+0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "7493f61f55a6cce7325f197443aa80d32554ba10"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.15+1"
+
+[[deps.OpenSpecFun_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
+uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
+version = "0.5.5+0"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "12f1439c4f986bb868acda6ea33ebc78e19b95ad"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.7.0"
+
+[[deps.OrdinaryDiffEq]]
+deps = ["ADTypes", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "EnumX", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FillArrays", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "InteractiveUtils", "LineSearches", "LinearAlgebra", "LinearSolve", "Logging", "MacroTools", "MuladdMacro", "NonlinearSolve", "OrdinaryDiffEqAdamsBashforthMoulton", "OrdinaryDiffEqBDF", "OrdinaryDiffEqCore", "OrdinaryDiffEqDefault", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqExplicitRK", "OrdinaryDiffEqExponentialRK", "OrdinaryDiffEqExtrapolation", "OrdinaryDiffEqFIRK", "OrdinaryDiffEqFeagin", "OrdinaryDiffEqFunctionMap", "OrdinaryDiffEqHighOrderRK", "OrdinaryDiffEqIMEXMultistep", "OrdinaryDiffEqLinear", "OrdinaryDiffEqLowOrderRK", "OrdinaryDiffEqLowStorageRK", "OrdinaryDiffEqNonlinearSolve", "OrdinaryDiffEqNordsieck", "OrdinaryDiffEqPDIRK", "OrdinaryDiffEqPRK", "OrdinaryDiffEqQPRK", "OrdinaryDiffEqRKN", "OrdinaryDiffEqRosenbrock", "OrdinaryDiffEqSDIRK", "OrdinaryDiffEqSSPRK", "OrdinaryDiffEqStabilizedIRK", "OrdinaryDiffEqStabilizedRK", "OrdinaryDiffEqSymplecticRK", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "Polyester", "PreallocationTools", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleNonlinearSolve", "SimpleUnPack", "SparseArrays", "SparseDiffTools", "Static", "StaticArrayInterface", "StaticArrays", "TruncatedStacktraces"]
+git-tree-sha1 = "36ce9bfc14a4b3dcf1490e80b5f1f4d35bfddf39"
+uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+version = "6.90.1"
+
+[[deps.OrdinaryDiffEqAdamsBashforthMoulton]]
+deps = ["ADTypes", "DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqLowOrderRK", "Polyester", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "8e3c5978d0531a961f70d2f2730d1d16ed3bbd12"
+uuid = "89bda076-bce5-4f1c-845f-551c83cdda9a"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqBDF]]
+deps = ["ArrayInterface", "DiffEqBase", "FastBroadcast", "LinearAlgebra", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "OrdinaryDiffEqSDIRK", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "StaticArrays", "TruncatedStacktraces"]
+git-tree-sha1 = "b4498d40bf35da0b6d22652ff2e9d8820590b3c6"
+uuid = "6ad6398a-0878-4a85-9266-38940aa047c8"
+version = "1.1.2"
+
+[[deps.OrdinaryDiffEqCore]]
+deps = ["ADTypes", "Accessors", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "EnumX", "FastBroadcast", "FastClosures", "FastPower", "FillArrays", "FunctionWrappersWrappers", "InteractiveUtils", "LinearAlgebra", "Logging", "MacroTools", "MuladdMacro", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleUnPack", "Static", "StaticArrayInterface", "StaticArraysCore", "SymbolicIndexingInterface", "TruncatedStacktraces"]
+git-tree-sha1 = "c7f395034602c3e4d40ece93dc2c9f066f0ce61f"
+uuid = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
+version = "1.13.0"
+weakdeps = ["EnzymeCore"]
+
+    [deps.OrdinaryDiffEqCore.extensions]
+    OrdinaryDiffEqCoreEnzymeCoreExt = "EnzymeCore"
+
+[[deps.OrdinaryDiffEqDefault]]
+deps = ["DiffEqBase", "EnumX", "LinearAlgebra", "LinearSolve", "OrdinaryDiffEqBDF", "OrdinaryDiffEqCore", "OrdinaryDiffEqRosenbrock", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "PrecompileTools", "Preferences", "Reexport"]
+git-tree-sha1 = "c8223e487d58bef28a3535b33ddf8ffdb44f46fb"
+uuid = "50262376-6c5a-4cf5-baba-aaf4f84d72d7"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqDifferentiation]]
+deps = ["ADTypes", "ArrayInterface", "DiffEqBase", "FastBroadcast", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "OrdinaryDiffEqCore", "SciMLBase", "SparseArrays", "SparseDiffTools", "StaticArrayInterface", "StaticArrays"]
+git-tree-sha1 = "8977f283a7d89c5d5c06c933467ed4af0a99f2f7"
+uuid = "4302a76b-040a-498a-8c04-15b101fed76b"
+version = "1.2.0"
+
+[[deps.OrdinaryDiffEqExplicitRK]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "TruncatedStacktraces"]
+git-tree-sha1 = "4dbce3f9e6974567082ce5176e21aab0224a69e9"
+uuid = "9286f039-9fbf-40e8-bf65-aa933bdc4db0"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqExponentialRK]]
+deps = ["DiffEqBase", "ExponentialUtilities", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqSDIRK", "OrdinaryDiffEqVerner", "RecursiveArrayTools", "Reexport", "SciMLBase"]
+git-tree-sha1 = "f63938b8e9e5d3a05815defb3ebdbdcf61ec0a74"
+uuid = "e0540318-69ee-4070-8777-9e2de6de23de"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqExtrapolation]]
+deps = ["DiffEqBase", "FastBroadcast", "FastPower", "LinearSolve", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "Polyester", "RecursiveArrayTools", "Reexport"]
+git-tree-sha1 = "048bcccc8f59c20d5b4ad268eef4d7d21c005a94"
+uuid = "becaefa8-8ca2-5cf9-886d-c06f3d2bd2c4"
+version = "1.2.1"
+
+[[deps.OrdinaryDiffEqFIRK]]
+deps = ["DiffEqBase", "FastBroadcast", "FastPower", "LinearAlgebra", "LinearSolve", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "SciMLOperators"]
+git-tree-sha1 = "7a6e3996dc0850aee6cdc10c8afa377242fce702"
+uuid = "5960d6e9-dd7a-4743-88e7-cf307b64f125"
+version = "1.5.0"
+
+[[deps.OrdinaryDiffEqFeagin]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "a7cc74d3433db98e59dc3d58bc28174c6c290adf"
+uuid = "101fe9f7-ebb6-4678-b671-3a81e7194747"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqFunctionMap]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "SciMLBase", "Static"]
+git-tree-sha1 = "925a91583d1ab84f1f0fea121be1abf1179c5926"
+uuid = "d3585ca7-f5d3-4ba6-8057-292ed1abd90f"
+version = "1.1.1"
+
+[[deps.OrdinaryDiffEqHighOrderRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "103e017ff186ac39d731904045781c9bacfca2b0"
+uuid = "d28bc4f8-55e1-4f49-af69-84c1a99f0f58"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqIMEXMultistep]]
+deps = ["DiffEqBase", "FastBroadcast", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Reexport"]
+git-tree-sha1 = "9f8f52aad2399d7714b400ff9d203254b0a89c4a"
+uuid = "9f002381-b378-40b7-97a6-27a27c83f129"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqLinear]]
+deps = ["DiffEqBase", "ExponentialUtilities", "LinearAlgebra", "OrdinaryDiffEqCore", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators"]
+git-tree-sha1 = "0f81a77ede3da0dc714ea61e81c76b25db4ab87a"
+uuid = "521117fe-8c41-49f8-b3b6-30780b3f0fb5"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqLowOrderRK]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "SciMLBase", "Static"]
+git-tree-sha1 = "d4bb32e09d6b68ce2eb45fb81001eab46f60717a"
+uuid = "1344f307-1e59-4825-a18e-ace9aa3fa4c6"
+version = "1.2.0"
+
+[[deps.OrdinaryDiffEqLowStorageRK]]
+deps = ["Adapt", "DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "StaticArrays"]
+git-tree-sha1 = "590561f3af623d5485d070b4d7044f8854535f5a"
+uuid = "b0944070-b475-4768-8dec-fb6eb410534d"
+version = "1.2.1"
+
+[[deps.OrdinaryDiffEqNonlinearSolve]]
+deps = ["ADTypes", "ArrayInterface", "DiffEqBase", "FastBroadcast", "FastClosures", "ForwardDiff", "LinearAlgebra", "LinearSolve", "MuladdMacro", "NonlinearSolve", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "PreallocationTools", "RecursiveArrayTools", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleNonlinearSolve", "StaticArrays"]
+git-tree-sha1 = "3a3eb0b7ef3f996c468d6f8013eac9525bcfd788"
+uuid = "127b3ac7-2247-4354-8eb6-78cf4e7c58e8"
+version = "1.3.0"
+
+[[deps.OrdinaryDiffEqNordsieck]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqTsit5", "Polyester", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "ef44754f10e0dfb9bb55ded382afed44cd94ab57"
+uuid = "c9986a66-5c92-4813-8696-a7ec84c806c8"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqPDIRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Polyester", "Reexport", "StaticArrays"]
+git-tree-sha1 = "a8b7f8107c477e07c6a6c00d1d66cac68b801bbc"
+uuid = "5dd0a6cf-3d4b-4314-aa06-06d4e299bc89"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqPRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "Reexport"]
+git-tree-sha1 = "da525d277962a1b76102c79f30cb0c31e13fe5b9"
+uuid = "5b33eab2-c0f1-4480-b2c3-94bc1e80bda1"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqQPRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "332f9d17d0229218f66a73492162267359ba85e9"
+uuid = "04162be5-8125-4266-98ed-640baecc6514"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqRKN]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport"]
+git-tree-sha1 = "41c09d9c20877546490f907d8dffdd52690dd65f"
+uuid = "af6ede74-add8-4cfd-b1df-9a4dbb109d7a"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqRosenbrock]]
+deps = ["ADTypes", "DiffEqBase", "FastBroadcast", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static"]
+git-tree-sha1 = "760a51a626d0065455847e4a3f788b07e86e5090"
+uuid = "43230ef6-c299-4910-a778-202eb28ce4ce"
+version = "1.3.1"
+
+[[deps.OrdinaryDiffEqSDIRK]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "SciMLBase", "TruncatedStacktraces"]
+git-tree-sha1 = "f6683803a58de600ab7a26d2f49411c9923e9721"
+uuid = "2d112036-d095-4a1e-ab9a-08536f3ecdbf"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqSSPRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "StaticArrays"]
+git-tree-sha1 = "7dbe4ac56f930df5e9abd003cedb54e25cbbea86"
+uuid = "669c94d9-1f4b-4b64-b377-1aa079aa2388"
+version = "1.2.0"
+
+[[deps.OrdinaryDiffEqStabilizedIRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "StaticArrays"]
+git-tree-sha1 = "348fd6def9a88518715425025eadd58517017325"
+uuid = "e3e12d00-db14-5390-b879-ac3dd2ef6296"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqStabilizedRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "StaticArrays"]
+git-tree-sha1 = "1b0d894c880e25f7d0b022d7257638cf8ce5b311"
+uuid = "358294b1-0aab-51c3-aafe-ad5ab194a2ad"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqSymplecticRK]]
+deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport"]
+git-tree-sha1 = "4e8b8c8b81df3df17e2eb4603115db3b30a88235"
+uuid = "fa646aed-7ef9-47eb-84c4-9443fc8cbfa8"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqTsit5]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "TruncatedStacktraces"]
+git-tree-sha1 = "96552f7d4619fabab4038a29ed37dd55e9eb513a"
+uuid = "b1df2697-797e-41e3-8120-5422d3b24e4a"
+version = "1.1.0"
+
+[[deps.OrdinaryDiffEqVerner]]
+deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "TruncatedStacktraces"]
+git-tree-sha1 = "81d7841e73e385b9925d5c8e4427f2adcdda55db"
+uuid = "79d7bb75-1356-48c1-b8c0-6832512096c2"
+version = "1.1.1"
+
+[[deps.P4est]]
+deps = ["CEnum", "MPI", "MPIPreferences", "P4est_jll", "Preferences", "Reexport", "UUIDs"]
+git-tree-sha1 = "6a924bc3d05ebb09de7e8294a30c022461a44720"
+uuid = "7d669430-f675-4ae7-b43e-fab78ec5a902"
+version = "0.4.13"
+
+[[deps.P4est_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "TOML", "Zlib_jll"]
+git-tree-sha1 = "70c2d9a33b8810198314a5722ee3e9520110b28d"
+uuid = "6b5a15aa-cf52-5330-8376-5e5d90283449"
+version = "2.8.1+2"
+
+[[deps.PackageExtensionCompat]]
+git-tree-sha1 = "fb28e33b8a95c4cee25ce296c817d89cc2e53518"
+uuid = "65ce6f38-6b18-4e1d-a461-8949797d7930"
+version = "1.0.2"
+weakdeps = ["Requires", "TOML"]
+
+[[deps.Parameters]]
+deps = ["OrderedCollections", "UnPack"]
+git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
+uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
+version = "0.12.3"
+
+[[deps.PathIntersections]]
+deps = ["ForwardDiff", "GaussQuadrature", "LinearAlgebra", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "5283bb8bb16e0f90ac5194af390e7d41f507763a"
+uuid = "4c1a95c7-462a-4a7e-b284-959c63fbf1dc"
+version = "0.2.0"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.11.0"
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+    [deps.Pkg.weakdeps]
+    REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[deps.PkgVersion]]
+deps = ["Pkg"]
+git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da"
+uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688"
+version = "0.3.3"
+
+[[deps.Polyester]]
+deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"]
+git-tree-sha1 = "6d38fea02d983051776a856b7df75b30cf9a3c1f"
+uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
+version = "0.7.16"
+
+[[deps.PolyesterWeave]]
+deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
+git-tree-sha1 = "645bed98cd47f72f67316fd42fc47dee771aefcd"
+uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
+version = "0.2.2"
+
+[[deps.PolynomialBases]]
+deps = ["ArgCheck", "AutoHashEquals", "FFTW", "FastGaussQuadrature", "LinearAlgebra", "Requires", "SimpleUnPack", "SpecialFunctions"]
+git-tree-sha1 = "b62fd0464edfffce54393cd617135af30fa47006"
+uuid = "c74db56a-226d-5e98-8bb0-a6049094aeea"
+version = "0.4.22"
+
+[[deps.PooledArrays]]
+deps = ["DataAPI", "Future"]
+git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3"
+uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+version = "1.4.3"
+
+[[deps.PreallocationTools]]
+deps = ["Adapt", "ArrayInterface", "ForwardDiff"]
+git-tree-sha1 = "6c62ce45f268f3f958821a1e5192cf91c75ae89c"
+uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
+version = "0.4.24"
+
+    [deps.PreallocationTools.extensions]
+    PreallocationToolsReverseDiffExt = "ReverseDiff"
+
+    [deps.PreallocationTools.weakdeps]
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.2.1"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.4.3"
+
+[[deps.PrettyTables]]
+deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"]
+git-tree-sha1 = "1101cd475833706e4d0e7b122218257178f48f34"
+uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+version = "2.4.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.PtrArrays]]
+git-tree-sha1 = "77a42d78b6a92df47ab37e177b2deac405e1c88f"
+uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d"
+version = "1.2.1"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.Random123]]
+deps = ["Random", "RandomNumbers"]
+git-tree-sha1 = "4743b43e5a9c4a2ede372de7061eed81795b12e7"
+uuid = "74087812-796a-5b5d-8853-05524746bad3"
+version = "1.7.0"
+
+[[deps.RandomNumbers]]
+deps = ["Random"]
+git-tree-sha1 = "c6ec94d2aaba1ab2ff983052cf6a606ca5985902"
+uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
+version = "1.6.0"
+
+[[deps.RecipesBase]]
+deps = ["PrecompileTools"]
+git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff"
+uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+version = "1.3.4"
+
+[[deps.RecursiveArrayTools]]
+deps = ["Adapt", "ArrayInterface", "DocStringExtensions", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"]
+git-tree-sha1 = "32f824db4e5bab64e25a12b22483a30a6b813d08"
+uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
+version = "3.27.4"
+
+    [deps.RecursiveArrayTools.extensions]
+    RecursiveArrayToolsFastBroadcastExt = "FastBroadcast"
+    RecursiveArrayToolsForwardDiffExt = "ForwardDiff"
+    RecursiveArrayToolsMeasurementsExt = "Measurements"
+    RecursiveArrayToolsMonteCarloMeasurementsExt = "MonteCarloMeasurements"
+    RecursiveArrayToolsReverseDiffExt = ["ReverseDiff", "Zygote"]
+    RecursiveArrayToolsSparseArraysExt = ["SparseArrays"]
+    RecursiveArrayToolsStructArraysExt = "StructArrays"
+    RecursiveArrayToolsTrackerExt = "Tracker"
+    RecursiveArrayToolsZygoteExt = "Zygote"
+
+    [deps.RecursiveArrayTools.weakdeps]
+    FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
+    MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+    StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[[deps.RecursiveFactorization]]
+deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "PrecompileTools", "StrideArraysCore", "TriangularSolve"]
+git-tree-sha1 = "6db1a75507051bc18bfa131fbc7c3f169cc4b2f6"
+uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
+version = "0.2.23"
+
+[[deps.Reexport]]
+git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "1.2.2"
+
+[[deps.Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.3.0"
+
+[[deps.RuntimeGeneratedFunctions]]
+deps = ["ExprTools", "SHA", "Serialization"]
+git-tree-sha1 = "04c968137612c4a5629fa531334bb81ad5680f00"
+uuid = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
+version = "0.5.13"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.SIMDTypes]]
+git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
+uuid = "94e857df-77ce-4151-89e5-788b33177be4"
+version = "0.1.0"
+
+[[deps.SLEEFPirates]]
+deps = ["IfElse", "Static", "VectorizationBase"]
+git-tree-sha1 = "456f610ca2fbd1c14f5fcf31c6bfadc55e7d66e0"
+uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
+version = "0.6.43"
+
+[[deps.SciMLBase]]
+deps = ["ADTypes", "Accessors", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "Expronicon", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "SciMLStructures", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface"]
+git-tree-sha1 = "213408a448e27170e4fca428838b8d11c5bbf1ab"
+uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
+version = "2.68.1"
+
+    [deps.SciMLBase.extensions]
+    SciMLBaseChainRulesCoreExt = "ChainRulesCore"
+    SciMLBaseMakieExt = "Makie"
+    SciMLBasePartialFunctionsExt = "PartialFunctions"
+    SciMLBasePyCallExt = "PyCall"
+    SciMLBasePythonCallExt = "PythonCall"
+    SciMLBaseRCallExt = "RCall"
+    SciMLBaseZygoteExt = "Zygote"
+
+    [deps.SciMLBase.weakdeps]
+    ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
+    PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b"
+    PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
+    PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
+    RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[[deps.SciMLJacobianOperators]]
+deps = ["ADTypes", "ArrayInterface", "ConcreteStructs", "ConstructionBase", "DifferentiationInterface", "FastClosures", "LinearAlgebra", "SciMLBase", "SciMLOperators"]
+git-tree-sha1 = "f66048bb969e67bd7d1bdd03cd0b81219642bbd0"
+uuid = "19f34311-ddf3-4b8b-af20-060888a46c0e"
+version = "0.1.1"
+
+[[deps.SciMLOperators]]
+deps = ["Accessors", "ArrayInterface", "DocStringExtensions", "LinearAlgebra", "MacroTools"]
+git-tree-sha1 = "6149620767866d4b0f0f7028639b6e661b6a1e44"
+uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961"
+version = "0.3.12"
+weakdeps = ["SparseArrays", "StaticArraysCore"]
+
+    [deps.SciMLOperators.extensions]
+    SciMLOperatorsSparseArraysExt = "SparseArrays"
+    SciMLOperatorsStaticArraysCoreExt = "StaticArraysCore"
+
+[[deps.SciMLStructures]]
+deps = ["ArrayInterface"]
+git-tree-sha1 = "0444a37a25fab98adbd90baa806ee492a3af133a"
+uuid = "53ae85a6-f571-4167-b2af-e1d143709226"
+version = "1.6.1"
+
+[[deps.Scratch]]
+deps = ["Dates"]
+git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386"
+uuid = "6c6a2e73-6563-6170-7368-637461726353"
+version = "1.2.1"
+
+[[deps.SentinelArrays]]
+deps = ["Dates", "Random"]
+git-tree-sha1 = "712fb0231ee6f9120e005ccd56297abbc053e7e0"
+uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
+version = "1.4.8"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.Setfield]]
+deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"]
+git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac"
+uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
+version = "1.1.1"
+
+[[deps.SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
+
+[[deps.SimpleNonlinearSolve]]
+deps = ["ADTypes", "ArrayInterface", "BracketingNonlinearSolve", "CommonSolve", "ConcreteStructs", "DifferentiationInterface", "FastClosures", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "Setfield", "StaticArraysCore"]
+git-tree-sha1 = "a3868a6add9f5989d1f4bd21de0333ef89fb9d9f"
+uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7"
+version = "2.1.0"
+
+    [deps.SimpleNonlinearSolve.extensions]
+    SimpleNonlinearSolveChainRulesCoreExt = "ChainRulesCore"
+    SimpleNonlinearSolveDiffEqBaseExt = "DiffEqBase"
+    SimpleNonlinearSolveReverseDiffExt = "ReverseDiff"
+    SimpleNonlinearSolveTrackerExt = "Tracker"
+
+    [deps.SimpleNonlinearSolve.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.SimpleTraits]]
+deps = ["InteractiveUtils", "MacroTools"]
+git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
+uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
+version = "0.9.4"
+
+[[deps.SimpleUnPack]]
+git-tree-sha1 = "58e6353e72cde29b90a69527e56df1b5c3d8c437"
+uuid = "ce78b400-467f-4804-87d8-8f486da07d0a"
+version = "1.1.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.SortingAlgorithms]]
+deps = ["DataStructures"]
+git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "1.2.1"
+
+[[deps.SparseArrays]]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+version = "1.11.0"
+
+[[deps.SparseDiffTools]]
+deps = ["ADTypes", "Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "PackageExtensionCompat", "Random", "Reexport", "SciMLOperators", "Setfield", "SparseArrays", "StaticArrayInterface", "StaticArrays", "UnPack", "VertexSafeGraphs"]
+git-tree-sha1 = "b906758c107b049b6b71599b9f928d9b14e5554a"
+uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
+version = "2.23.0"
+
+    [deps.SparseDiffTools.extensions]
+    SparseDiffToolsEnzymeExt = "Enzyme"
+    SparseDiffToolsPolyesterExt = "Polyester"
+    SparseDiffToolsPolyesterForwardDiffExt = "PolyesterForwardDiff"
+    SparseDiffToolsSymbolicsExt = "Symbolics"
+    SparseDiffToolsZygoteExt = "Zygote"
+
+    [deps.SparseDiffTools.weakdeps]
+    Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+    Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
+    PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b"
+    Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[[deps.SparseMatrixColorings]]
+deps = ["ADTypes", "DataStructures", "DocStringExtensions", "LinearAlgebra", "Random", "SparseArrays"]
+git-tree-sha1 = "76b44c879661552d64f382acf66faa29ab56b3d9"
+uuid = "0a514795-09f3-496d-8182-132a7b665d35"
+version = "0.4.10"
+weakdeps = ["Colors"]
+
+    [deps.SparseMatrixColorings.extensions]
+    SparseMatrixColoringsColorsExt = "Colors"
+
+[[deps.Sparspak]]
+deps = ["Libdl", "LinearAlgebra", "Logging", "OffsetArrays", "Printf", "SparseArrays", "Test"]
+git-tree-sha1 = "342cf4b449c299d8d1ceaf00b7a49f4fbc7940e7"
+uuid = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
+version = "0.3.9"
+
+[[deps.SpecialFunctions]]
+deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
+git-tree-sha1 = "64cca0c26b4f31ba18f13f6c12af7c85f478cfde"
+uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
+version = "2.5.0"
+weakdeps = ["ChainRulesCore"]
+
+    [deps.SpecialFunctions.extensions]
+    SpecialFunctionsChainRulesCoreExt = "ChainRulesCore"
+
+[[deps.StableRNGs]]
+deps = ["Random"]
+git-tree-sha1 = "83e6cce8324d49dfaf9ef059227f91ed4441a8e5"
+uuid = "860ef19b-820b-49d6-a774-d7a799459cd3"
+version = "1.0.2"
+
+[[deps.StartUpDG]]
+deps = ["ConstructionBase", "FillArrays", "HDF5", "Kronecker", "LinearAlgebra", "NodesAndModes", "PathIntersections", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "Setfield", "SparseArrays", "StaticArrays", "Triangulate", "WriteVTK"]
+git-tree-sha1 = "498a2fa1132a294a99385f334d596d92f3ca6ca3"
+uuid = "472ebc20-7c99-4d4b-9470-8fde4e9faa0f"
+version = "1.1.5"
+
+    [deps.StartUpDG.extensions]
+    StartUpDGSummationByPartsOperatorsExt = "SummationByPartsOperators"
+    TriangulatePlotsExt = "Plots"
+
+    [deps.StartUpDG.weakdeps]
+    Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+    SummationByPartsOperators = "9f78cca6-572e-554e-b819-917d2f1cf240"
+
+[[deps.Static]]
+deps = ["CommonWorldInvalidations", "IfElse", "PrecompileTools"]
+git-tree-sha1 = "87d51a3ee9a4b0d2fe054bdd3fc2436258db2603"
+uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
+version = "1.1.1"
+
+[[deps.StaticArrayInterface]]
+deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Static"]
+git-tree-sha1 = "96381d50f1ce85f2663584c8e886a6ca97e60554"
+uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718"
+version = "1.8.0"
+weakdeps = ["OffsetArrays", "StaticArrays"]
+
+    [deps.StaticArrayInterface.extensions]
+    StaticArrayInterfaceOffsetArraysExt = "OffsetArrays"
+    StaticArrayInterfaceStaticArraysExt = "StaticArrays"
+
+[[deps.StaticArrays]]
+deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"]
+git-tree-sha1 = "777657803913ffc7e8cc20f0fd04b634f871af8f"
+uuid = "90137ffa-7385-5640-81b9-e52037218182"
+version = "1.9.8"
+weakdeps = ["ChainRulesCore", "Statistics"]
+
+    [deps.StaticArrays.extensions]
+    StaticArraysChainRulesCoreExt = "ChainRulesCore"
+    StaticArraysStatisticsExt = "Statistics"
+
+[[deps.StaticArraysCore]]
+git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682"
+uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+version = "1.4.3"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.11.1"
+weakdeps = ["SparseArrays"]
+
+    [deps.Statistics.extensions]
+    SparseArraysExt = ["SparseArrays"]
+
+[[deps.StatsAPI]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed"
+uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
+version = "1.7.0"
+
+[[deps.StatsBase]]
+deps = ["AliasTables", "DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
+git-tree-sha1 = "29321314c920c26684834965ec2ce0dacc9cf8e5"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.34.4"
+
+[[deps.StrideArrays]]
+deps = ["ArrayInterface", "LinearAlgebra", "LoopVectorization", "Octavian", "Random", "SLEEFPirates", "Static", "StaticArrayInterface", "StaticArraysCore", "Statistics", "StrideArraysCore", "VectorizationBase", "VectorizedRNG", "VectorizedStatistics"]
+git-tree-sha1 = "a009ced9a1952b91f3982a6e06df672189c6cbc9"
+uuid = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"
+version = "0.1.29"
+
+[[deps.StrideArraysCore]]
+deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"]
+git-tree-sha1 = "f35f6ab602df8413a50c4a25ca14de821e8605fb"
+uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
+version = "0.5.7"
+
+[[deps.StringManipulation]]
+deps = ["PrecompileTools"]
+git-tree-sha1 = "a6b1675a536c5ad1a60e5a5153e1fee12eb146e3"
+uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e"
+version = "0.4.0"
+
+[[deps.StructArrays]]
+deps = ["ConstructionBase", "DataAPI", "Tables"]
+git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be"
+uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+version = "0.6.18"
+weakdeps = ["Adapt", "GPUArraysCore", "SparseArrays", "StaticArrays"]
+
+    [deps.StructArrays.extensions]
+    StructArraysAdaptExt = "Adapt"
+    StructArraysGPUArraysCoreExt = "GPUArraysCore"
+    StructArraysSparseArraysExt = "SparseArrays"
+    StructArraysStaticArraysExt = "StaticArrays"
+
+[[deps.SuiteSparse_jll]]
+deps = ["Artifacts", "Libdl", "libblastrampoline_jll"]
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "7.7.0+0"
+
+[[deps.SummationByPartsOperators]]
+deps = ["ArgCheck", "AutoHashEquals", "FFTW", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "MuladdMacro", "PolynomialBases", "PrecompileTools", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "SimpleUnPack", "SparseArrays", "StaticArrayInterface", "StaticArrays", "Unrolled"]
+git-tree-sha1 = "0405e8d721d57b8b2674d61d263527e97efbb8cc"
+uuid = "9f78cca6-572e-554e-b819-917d2f1cf240"
+version = "0.5.72"
+
+    [deps.SummationByPartsOperators.extensions]
+    SummationByPartsOperatorsBandedMatricesExt = "BandedMatrices"
+    SummationByPartsOperatorsDiffEqCallbacksExt = "DiffEqCallbacks"
+    SummationByPartsOperatorsForwardDiffExt = "ForwardDiff"
+    SummationByPartsOperatorsOptimForwardDiffExt = ["Optim", "ForwardDiff"]
+    SummationByPartsOperatorsStructArraysExt = "StructArrays"
+
+    [deps.SummationByPartsOperators.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+    StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+
+[[deps.SymbolicIndexingInterface]]
+deps = ["Accessors", "ArrayInterface", "RuntimeGeneratedFunctions", "StaticArraysCore"]
+git-tree-sha1 = "8db233b54917e474165d582bef2244fa040e0a56"
+uuid = "2efcf032-c050-4f8e-a9bb-153293bab1f5"
+version = "0.3.36"
+
+[[deps.T8code]]
+deps = ["CEnum", "Libdl", "MPI", "MPIPreferences", "Preferences", "Reexport", "UUIDs", "t8code_jll"]
+git-tree-sha1 = "d5e26ded4f83076c05739140080de1b165a5c79d"
+uuid = "d0cc0030-9a40-4274-8435-baadcfd54fa1"
+version = "0.7.3"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.1"
+
+[[deps.Tables]]
+deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"]
+git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "1.12.0"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.ThreadingUtilities]]
+deps = ["ManualMemory"]
+git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27"
+uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5"
+version = "0.5.2"
+
+[[deps.TimerOutputs]]
+deps = ["ExprTools", "Printf"]
+git-tree-sha1 = "d7298ebdfa1654583468a487e8e83fae9d72dac3"
+uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+version = "0.5.26"
+
+[[deps.TranscodingStreams]]
+git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.11.3"
+
+[[deps.Triangle_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "fe28e9a4684f6f54e868b9136afb8fd11f1734a7"
+uuid = "5639c1d2-226c-5e70-8d55-b3095415a16a"
+version = "1.6.2+0"
+
+[[deps.TriangularSolve]]
+deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
+git-tree-sha1 = "be986ad9dac14888ba338c2554dcfec6939e1393"
+uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
+version = "0.2.1"
+
+[[deps.Triangulate]]
+deps = ["DocStringExtensions", "Printf", "Triangle_jll"]
+git-tree-sha1 = "e387c61cb8f5f091e61d4e443a5f435d769871c2"
+uuid = "f7e6ffb2-c36d-4f8f-a77e-16e897189344"
+version = "2.3.4"
+
+    [deps.Triangulate.weakdeps]
+    CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+    GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
+    PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
+
+[[deps.TriplotBase]]
+git-tree-sha1 = "4d4ed7f294cda19382ff7de4c137d24d16adc89b"
+uuid = "981d1d27-644d-49a2-9326-4793e63143c3"
+version = "0.1.0"
+
+[[deps.TriplotRecipes]]
+deps = ["RecipesBase", "TriplotBase"]
+git-tree-sha1 = "fceb3b0f37ff6ccf3c70b9c5198d2eefec46ada0"
+uuid = "808ab39a-a642-4abf-81ff-4cb34ebbffa3"
+version = "0.1.2"
+
+[[deps.Trixi]]
+deps = ["Accessors", "Adapt", "CodeTracking", "ConstructionBase", "DataStructures", "DelimitedFiles", "DiffEqBase", "DiffEqCallbacks", "Downloads", "EllipsisNotation", "FillArrays", "ForwardDiff", "HDF5", "IfElse", "KernelAbstractions", "LinearAlgebra", "LinearMaps", "LoopVectorization", "MPI", "MuladdMacro", "Octavian", "OffsetArrays", "P4est", "Polyester", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "Reexport", "Requires", "SciMLBase", "SimpleUnPack", "SparseArrays", "StableRNGs", "StartUpDG", "Static", "StaticArrayInterface", "StaticArrays", "StrideArrays", "StructArrays", "SummationByPartsOperators", "T8code", "TimerOutputs", "Triangulate", "TriplotBase", "TriplotRecipes", "TrixiBase", "UUIDs"]
+path = ".."
+uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
+version = "0.9.12-DEV"
+
+    [deps.Trixi.extensions]
+    TrixiConvexECOSExt = ["Convex", "ECOS"]
+    TrixiMakieExt = "Makie"
+    TrixiNLsolveExt = "NLsolve"
+
+    [deps.Trixi.weakdeps]
+    Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
+    ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
+    Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
+    NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
+
+[[deps.TrixiBase]]
+deps = ["TimerOutputs"]
+git-tree-sha1 = "017b747e5d59a41e903a6b03a083db7102236e1e"
+uuid = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
+version = "0.1.4"
+weakdeps = ["MPI"]
+
+    [deps.TrixiBase.extensions]
+    TrixiBaseMPIExt = "MPI"
+
+[[deps.TruncatedStacktraces]]
+deps = ["InteractiveUtils", "MacroTools", "Preferences"]
+git-tree-sha1 = "ea3e54c2bdde39062abf5a9758a23735558705e1"
+uuid = "781d530d-4396-4725-bb49-402e4bee1e77"
+version = "1.4.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.UnPack]]
+git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
+uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+version = "1.0.2"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Unrolled]]
+deps = ["MacroTools"]
+git-tree-sha1 = "6cc9d682755680e0f0be87c56392b7651efc2c7b"
+uuid = "9602ed7d-8fef-5bc8-8597-8f21381861e8"
+version = "0.1.5"
+
+[[deps.UnsafeAtomics]]
+git-tree-sha1 = "b13c4edda90890e5b04ba24e20a310fbe6f249ff"
+uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f"
+version = "0.3.0"
+weakdeps = ["LLVM"]
+
+    [deps.UnsafeAtomics.extensions]
+    UnsafeAtomicsLLVM = ["LLVM"]
+
+[[deps.VTKBase]]
+git-tree-sha1 = "c2d0db3ef09f1942d08ea455a9e252594be5f3b6"
+uuid = "4004b06d-e244-455f-a6ce-a5f9919cc534"
+version = "1.0.1"
+
+[[deps.VectorizationBase]]
+deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"]
+git-tree-sha1 = "4ab62a49f1d8d9548a1c8d1a75e5f55cf196f64e"
+uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
+version = "0.21.71"
+
+[[deps.VectorizedRNG]]
+deps = ["Distributed", "Random", "SLEEFPirates", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "5ca83562ba95272d8709c6c91e31e23c3c4c9825"
+uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e"
+version = "0.2.25"
+weakdeps = ["Requires", "StaticArraysCore"]
+
+    [deps.VectorizedRNG.extensions]
+    VectorizedRNGStaticArraysExt = ["StaticArraysCore"]
+
+[[deps.VectorizedStatistics]]
+deps = ["LoopVectorization", "PrecompileTools", "Static"]
+git-tree-sha1 = "f59703fbab297efe6ad09ef1dc656f8f0a21ad28"
+uuid = "3b853605-1c98-4422-8364-4bd93ee0529e"
+version = "0.5.10"
+
+[[deps.VertexSafeGraphs]]
+deps = ["Graphs"]
+git-tree-sha1 = "8351f8d73d7e880bfc042a8b6922684ebeafb35c"
+uuid = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f"
+version = "0.2.0"
+
+[[deps.WriteVTK]]
+deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams", "VTKBase"]
+git-tree-sha1 = "1d8042d58334ab7947ce505709df7009da6f3375"
+uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192"
+version = "1.21.1"
+
+[[deps.XML2_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"]
+git-tree-sha1 = "a2fccc6559132927d4c5dc183e3e01048c6dcbd6"
+uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
+version = "2.13.5+0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.13+1"
+
+[[deps.demumble_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "6498e3581023f8e530f34760d18f75a69e3a4ea8"
+uuid = "1e29f10c-031c-5a83-9565-69cddfc27673"
+version = "1.3.0+0"
+
+[[deps.libaec_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "46bf7be2917b59b761247be3f317ddf75e50e997"
+uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0"
+version = "1.1.2+0"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.11.0+0"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.59.0+0"
+
+[[deps.oneTBB_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "7d0ea0f4895ef2f5cb83645fa689e52cb55cf493"
+uuid = "1317d2d5-d96f-522e-a858-c73665f53c3e"
+version = "2021.12.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+2"
+
+[[deps.t8code_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "TOML", "Zlib_jll"]
+git-tree-sha1 = "085c372c5f9459042258a7f51ee8734784700281"
+uuid = "4ee9bed8-4011-53f7-90c2-22363c2f500d"
+version = "3.0.0+0"
diff --git a/esiwace/Project.toml b/esiwace/Project.toml
new file mode 100644
index 00000000000..8bb32271f42
--- /dev/null
+++ b/esiwace/Project.toml
@@ -0,0 +1,11 @@
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+P4est = "7d669430-f675-4ae7-b43e-fab78ec5a902"
+T8code = "d0cc0030-9a40-4274-8435-baadcfd54fa1"
+TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
diff --git a/esiwace/profile b/esiwace/profile
new file mode 100644
index 00000000000..97e4befd40f
--- /dev/null
+++ b/esiwace/profile
@@ -0,0 +1,11 @@
+module purge
+module load spack/22.2.1
+module load gcc/11.2.0
+module load openmpi/4.1.2-gcc11
+module load hdf5/1.10.7-gcc11
+
+export CUDA_HOME=/usr/local/cuda-12.6/
+
+export JULIA_DEPOT_PATH="$SCRATCH/julia-depot/"
+export JL="julia +1.11"
+

From 25eb79581236565d5aac7f8b534e1a044dab8b2e Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 17 Dec 2024 17:35:07 +0100
Subject: [PATCH 83/89] new T8code version

---
 esiwace/Manifest.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/esiwace/Manifest.toml b/esiwace/Manifest.toml
index 32ff93ba07e..96d39f43b8e 100644
--- a/esiwace/Manifest.toml
+++ b/esiwace/Manifest.toml
@@ -2002,9 +2002,9 @@ version = "0.3.36"
 
 [[deps.T8code]]
 deps = ["CEnum", "Libdl", "MPI", "MPIPreferences", "Preferences", "Reexport", "UUIDs", "t8code_jll"]
-git-tree-sha1 = "d5e26ded4f83076c05739140080de1b165a5c79d"
+git-tree-sha1 = "1b5ef460f156ed68e3affb67f48e2b4bec9915e4"
 uuid = "d0cc0030-9a40-4274-8435-baadcfd54fa1"
-version = "0.7.3"
+version = "0.7.4"
 
 [[deps.TOML]]
 deps = ["Dates"]
@@ -2231,6 +2231,6 @@ version = "17.4.0+2"
 
 [[deps.t8code_jll]]
 deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "TOML", "Zlib_jll"]
-git-tree-sha1 = "085c372c5f9459042258a7f51ee8734784700281"
+git-tree-sha1 = "cf073e7d4275b8a030140936639f3d6a5eeb3e74"
 uuid = "4ee9bed8-4011-53f7-90c2-22363c2f500d"
-version = "3.0.0+0"
+version = "3.0.1+0"

From d6d946f0f83bff0b983c5b81727e1b7b78abb33d Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 17 Dec 2024 17:36:06 +0100
Subject: [PATCH 84/89] manual update for latest merge

---
 src/solvers/dgsem_p4est/dg_3d.jl     | 12 +++++-------
 src/solvers/dgsem_p4est/dg_3d_gpu.jl |  4 ++--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index cf3482b874e..e0896f5d84e 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -13,11 +13,11 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
     fstar_primary_threaded = [Array{uEltype, 4}(undef, nvariables(equations),
                                                 nnodes(mortar_l2),
                                                 nnodes(mortar_l2), 4)
-                              for _ in 1:Threads.nthreads()]
+                              for _ in 1:Threads.nthreads()] |> VecOfArrays
     fstar_secondary_threaded = [Array{uEltype, 4}(undef, nvariables(equations),
                                                   nnodes(mortar_l2),
                                                   nnodes(mortar_l2), 4)
-                                for _ in 1:Threads.nthreads()]
+                                for _ in 1:Threads.nthreads()] |> VecOfArrays
 
     fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations),
                                             nnodes(mortar_l2), nnodes(mortar_l2))
@@ -451,16 +451,14 @@ end
 
 function prolong2mortars!(cache, u,
                           mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
-                          mortar_l2::LobattoLegendreMortarL2,
-                          surface_integral, dg::DGSEM)
+                          mortar_l2::LobattoLegendreMortarL2, dg::DGSEM)
     backend = backend_or_nothing(cache.mortars)
-    _prolong2mortars!(backend, cache, u, mesh, equations, mortar_l2, surface_integral, dg)
+    _prolong2mortars!(backend, cache, u, mesh, equations, mortar_l2, dg)
 end
 
 @inline function _prolong2mortars!(backend::Nothing, cache, u,
                                    mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations,
-                                   mortar_l2::LobattoLegendreMortarL2,
-                                   surface_integral, dg::DGSEM)
+                                   mortar_l2::LobattoLegendreMortarL2, dg::DGSEM)
     @unpack fstar_tmp_threaded = cache
     @unpack neighbor_ids, node_indices = cache.mortars
     index_range = eachnode(dg)
diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu.jl b/src/solvers/dgsem_p4est/dg_3d_gpu.jl
index 9e99292aa1b..92d545fd263 100644
--- a/src/solvers/dgsem_p4est/dg_3d_gpu.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_gpu.jl
@@ -464,7 +464,7 @@ end
 @inline function _prolong2mortars!(backend::Backend, cache, u,
                                    mesh::P4estMesh{3}, equations,
                                    mortar_l2::LobattoLegendreMortarL2,
-                                   surface_integral, dg::DGSEM)
+                                   dg::DGSEM)
     if nmortars(dg, cache) > 0
         error("mortars currently not supported by KA.jl P4estMesh solver")
     end
@@ -588,4 +588,4 @@ end
         add_to_first_axis!(du, du_local, i, j, k, element)
     end
 end
-end # @muladd
\ No newline at end of file
+end # @muladd

From 4a978a5f3a5fec1c81ea2baa66d5f5be11764f53 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Wed, 18 Dec 2024 10:01:41 +0100
Subject: [PATCH 85/89] update README

---
 esiwace/README.md | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/esiwace/README.md b/esiwace/README.md
index 32265cf8b4e..583c2100ff8 100644
--- a/esiwace/README.md
+++ b/esiwace/README.md
@@ -5,13 +5,19 @@
 You need to get an account at https://docs.terrabyte.lrz.de/services/identity/get-account/
 and set up two-factor authentication.
 
+Documentation is available here: https://docs.terrabyte.lrz.de/
+
 ### Login
 ```shell
 ssh login.terrabyte.lrz.de
 ```
+You have storage space at `$HOME`, `$SCRATCH` (not backed up, temporary), and `$PROJECT`
+(soon to come).
 
 ### Set up t8code
-** TODO: change to project directory, then this step can be skipped **
+
+**TODO: once there is $PROJECT, this step can be skipped**
+
 1. Load modules
    ```shell
    module load gcc/11.2.0
@@ -51,7 +57,7 @@ ssh login.terrabyte.lrz.de
 
 ## Set up Julia
 Julia is not available on the cluster. We need to install it manually.
-1. If there no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one
+1. If there is no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one
    ```
    touch $HOME/.bashrc
    ```
@@ -81,35 +87,49 @@ Julia is not available on the cluster. We need to install it manually.
    . profile
    ```
 3. The Julia project is configured by several files: `Project.toml` lists dependencies,
-   `Manifest.toml` list exact version numbers for all required packages,
+   `Manifest.toml` lists exact version numbers for all installed packages,
    `LocalPreferences.toml` contains advanced configuration options.
    It should only be necessary to adapt `LocalPreference.toml` to reflect the t8code
    installation path.
 4. Open Julia via the `$JL` command and instantiate the project:
    ```shell
-   $JL --project -e 'using Pkg; Pkg.instantiate()'
+   $JL --project=. -e 'using Pkg; Pkg.instantiate()'
    ```
+   This will take some time!
 
 
 ## Precompile Trixi.jl
 1. Make sure that everything is precompiled by running the following:
    ```shell
-   $JL --project -e 'using OrdinaryDiffEq, Trixi'
+   $JL --project=. -e 'using OrdinaryDiffEq, Trixi'
    ```
 2. To test CUDA, first log in to a GPU node:
    ```shell
-   salloc --cluster=hpda2 --partition=hpda2_compute_gpu --nodes=1 --ntasks-per-node=1 --gres=gpu:4 --time=00:30:00
+   salloc --cluster=hpda2 --partition=hpda2_testgpu --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --time=00:30:00
    ```
    Then start Julia:
    ```shell
-   $JL --project -e 'using CUDA; CUDA.versioninfo()'
+   $JL --project=. -e 'using CUDA; CUDA.versioninfo()'
+   ```
+   This should print
+   ```
+   CUDA runtime 12.6, local installation
+   ...
+   ```
+   If it fails, it might help to re-set the CUDA runtime:
+   ```shell
+   $JL --project=. -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber(12,6); local_toolkit=true)
    ```
+   <!--
+   pkg = Base.PkgId(Base.UUID("76a88914-d11a-5bdc-97e0-2f5a05c973a2"), "CUDA_Runtime_jll")
+   Base.compilecache(pkg)
+   -->
 
 
 ## Launch
-1. SLURM jobscript are found in `jobscripts`. Edit as necessary. At least, you have to
+1. SLURM jobscripts can be found in `jobscripts`. Edit as necessary. At least, you have to
    specify your mail address.
-2. The actual simulation is configured in `run.jl` and based on Trixi.jl file in `elixirs`.
+2. The actual simulation is configured in `run.jl` and based on Trixi.jl files in `elixirs`.
 3. Send job to queue:
    ```shell
    sbatch jobscript/single_node.sh

From fce3af0c6d88b9205a2e9a0e95561d2dae338966 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Wed, 18 Dec 2024 16:18:52 +0100
Subject: [PATCH 86/89] same with prolong2mpimortars!

---
 src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl | 2 +-
 src/solvers/dgsem_p4est/dg_3d_parallel.jl     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
index 3ef977af499..c94ccd764c7 100644
--- a/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl
@@ -161,7 +161,7 @@ end
                                       mesh::ParallelP4estMesh{3},
                                       equations,
                                       mortar_l2::LobattoLegendreMortarL2,
-                                      surface_integral, dg::DGSEM)
+                                      dg::DGSEM)
     if nmpimortars(dg, cache) > 0
         error("mortars currently not supported by KA.jl P4estMesh solver")
     end
diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
index 475ca0389d5..87340bc07d5 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
@@ -327,17 +327,17 @@ function prolong2mpimortars!(cache, u,
                              mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
                              equations,
                              mortar_l2::LobattoLegendreMortarL2,
-                             surface_integral, dg::DGSEM)
+                             dg::DGSEM)
     backend = backend_or_nothing(cache.mpi_mortars)
     _prolong2mpimortars!(backend, cache, u, mesh, equations,
-                         mortar_l2, surface_integral, dg)
+                         mortar_l2, dg)
 end
 
 @inline function _prolong2mpimortars!(backend::Nothing, cache, u,
                                       mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
                                       equations,
                                       mortar_l2::LobattoLegendreMortarL2,
-                                      surface_integral, dg::DGSEM)
+                                      dg::DGSEM)
     @unpack node_indices = cache.mpi_mortars
     index_range = eachnode(dg)
 

From 6f3286aa90033144001d8e880bb45e650bdc2f05 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 19 Dec 2024 15:58:28 +0100
Subject: [PATCH 87/89] output UUIDs of GPUs

---
 esiwace/run.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/esiwace/run.jl b/esiwace/run.jl
index 21b5e6d5975..046405dd6ca 100644
--- a/esiwace/run.jl
+++ b/esiwace/run.jl
@@ -10,10 +10,9 @@ function main(elixir_path)
     isroot = rank == 0
 
     # pin rank to device?
-    #if machine == "jedi"
-    #    CUDA.device!(rank % 4)
-    #end
-    print("Rank $rank has device: $(CUDA.device())\n")
+    CUDA.device!(rank % 4)
+    gpu = CUDA.device()
+    print("Rank $rank has device $(gpu) with ID $(CUDA.uuid(gpu)), has CUDA: $(MPI.has_cuda())\n")
 
     # setup
     maxiters = 400

From bdb8ab565af893738e7fe59684a30f1ab7285835 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 19 Dec 2024 15:58:55 +0100
Subject: [PATCH 88/89] revised setup

---
 esiwace/LocalPreferences.toml     | 12 ++++++------
 esiwace/Manifest.toml             | 22 ++++++++--------------
 esiwace/jobscripts/single_node.sh |  9 ++++++---
 esiwace/profile                   | 11 +++++------
 4 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/esiwace/LocalPreferences.toml b/esiwace/LocalPreferences.toml
index 76b2b9b6cd0..8e9b0f7f61d 100644
--- a/esiwace/LocalPreferences.toml
+++ b/esiwace/LocalPreferences.toml
@@ -1,14 +1,14 @@
 [CUDA_Runtime_jll]
 local = "true"
-version = "12.6"
+version = "11.8"
 
 [HDF5]
-libhdf5 = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5.so"
-libhdf5_hl = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5_hl.so"
+libhdf5 = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5.so"
+libhdf5_hl = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5_hl.so"
 
 [HDF5_jll]
-libhdf5_hl_path = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5_hl.so"
-libhdf5_path = "/dss/lrzsys/sys/spack/release/22.2.1/opt/icelake/hdf5/1.10.7-gcc-dgmcmi4/lib/libhdf5.so"
+libhdf5_hl_path = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5_hl.so"
+libhdf5_path = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5.so"
 
 [MPIPreferences]
 __clear__ = ["preloads_env_switch"]
@@ -17,7 +17,7 @@ abi = "OpenMPI"
 binary = "system"
 cclibs = []
 libmpi = "libmpi"
-mpiexec = "srun"
+mpiexec = "mpiexec"
 preloads = []
 
 [P4est]
diff --git a/esiwace/Manifest.toml b/esiwace/Manifest.toml
index 96d39f43b8e..716f88b6047 100644
--- a/esiwace/Manifest.toml
+++ b/esiwace/Manifest.toml
@@ -698,10 +698,10 @@ weakdeps = ["MPI"]
     MPIExt = "MPI"
 
 [[deps.HDF5_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"]
-git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739"
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"]
+git-tree-sha1 = "82a471768b513dc39e471540fdadc84ff80ff997"
 uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
-version = "1.14.2+1"
+version = "1.14.3+3"
 
 [[deps.HostCPUFeatures]]
 deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
@@ -837,12 +837,6 @@ git-tree-sha1 = "2e5c102cfc41f48ae4740c7eca7743cc7e7b75ea"
 uuid = "8b046642-f1f6-4319-8d3c-209ddc03c586"
 version = "1.0.0"
 
-[[deps.LLVMOpenMP_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "78211fb6cbc872f77cad3fc0b6cf647d923f4929"
-uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
-version = "18.1.7+0"
-
 [[deps.LaTeXStrings]]
 git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"
 uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
@@ -1267,9 +1261,9 @@ version = "0.3.28"
     HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
 
 [[deps.OffsetArrays]]
-git-tree-sha1 = "39d000d9c33706b8364817d8894fae1548f40295"
+git-tree-sha1 = "5e1897147d1ff8d98883cda2be2187dcf57d8f0c"
 uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-version = "1.14.2"
+version = "1.15.0"
 weakdeps = ["Adapt"]
 
     [deps.OffsetArrays.extensions]
@@ -1286,10 +1280,10 @@ uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
 version = "0.8.1+2"
 
 [[deps.OpenMPI_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML", "Zlib_jll"]
-git-tree-sha1 = "2dace87e14256edb1dd0724ab7ba831c779b96bd"
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
+git-tree-sha1 = "e25c1778a98e34219a00455d6e4384e017ea9762"
 uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
-version = "5.0.6+0"
+version = "4.1.6+0"
 
 [[deps.OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl"]
diff --git a/esiwace/jobscripts/single_node.sh b/esiwace/jobscripts/single_node.sh
index a0cf8b6742f..fd88d9d3267 100644
--- a/esiwace/jobscripts/single_node.sh
+++ b/esiwace/jobscripts/single_node.sh
@@ -4,13 +4,16 @@
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=4
 #SBATCH --gres=gpu:4
-#SBATCH --mail-user=<mail_addr>
-#SBATCH --mail-type=all
+#SBATCH --get-user-env
 #SBATCH --export=NONE
+#SBATCH --mail-user=bgeihe@uni-koeln.de
+#SBATCH --mail-type=all
+#SBATCH --job-name=single_node
 #SBATCH --output=stdout.%j
 #SBATCH --error=stderr.%j
 #SBATCH --time=00:30:00
 
 source profile
 
-srun $JL --threads=1 --project=. run.jl
+mpiexec -n $SLURM_NTASKS $JL --threads=1 --project=. run.jl
+
diff --git a/esiwace/profile b/esiwace/profile
index 97e4befd40f..e69e3fa87f1 100644
--- a/esiwace/profile
+++ b/esiwace/profile
@@ -1,10 +1,9 @@
 module purge
-module load spack/22.2.1
-module load gcc/11.2.0
-module load openmpi/4.1.2-gcc11
-module load hdf5/1.10.7-gcc11
-
-export CUDA_HOME=/usr/local/cuda-12.6/
+module load slurm_setup
+module load spack/23.1.0
+module load gcc/12.2.0
+module load openmpi/4.1.5-gcc11
+module load cuda/11.8.0
 
 export JULIA_DEPOT_PATH="$SCRATCH/julia-depot/"
 export JL="julia +1.11"

From 56297332d89b70314a4e3e0d22c4b7d1fe697fb7 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 19 Dec 2024 16:07:10 +0100
Subject: [PATCH 89/89] updated esiwace docs

---
 esiwace/README.md | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/esiwace/README.md b/esiwace/README.md
index 583c2100ff8..dfde57a0037 100644
--- a/esiwace/README.md
+++ b/esiwace/README.md
@@ -7,6 +7,7 @@ and set up two-factor authentication.
 
 Documentation is available here: https://docs.terrabyte.lrz.de/
 
+
 ### Login
 ```shell
 ssh login.terrabyte.lrz.de
@@ -14,15 +15,17 @@ ssh login.terrabyte.lrz.de
 You have storage space at `$HOME`, `$SCRATCH` (not backed up, temporary), and `$PROJECT`
 (soon to come).
 
+
 ### Set up t8code
 
 **TODO: once there is $PROJECT, this step can be skipped**
 
 1. Load modules
    ```shell
-   module load gcc/11.2.0
-   module load openmpi/4.1.2-gcc11
-   module load hdf5/1.10.7-gcc11
+   module purge
+   module load spack/23.1.0
+   module load gcc/12.2.0
+   module load openmpi/4.1.5-gcc11
    ```
 2. Change to scratch folder
    ```shell
@@ -55,7 +58,8 @@ You have storage space at `$HOME`, `$SCRATCH` (not backed up, temporary), and `$
    nice make install -j8
    ```
 
-## Set up Julia
+
+### Set up Julia
 Julia is not available on the cluster. We need to install it manually.
 1. If there is no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one
    ```
@@ -75,7 +79,8 @@ Julia is not available on the cluster. We need to install it manually.
    juliaup add 1.11
    ```
 
-## Set up Trixi.jl
+
+### Set up Trixi.jl
 1. Clone the repository
    ```shell
    git clone https://github.com/benegee/Trixi.jl.git
@@ -95,14 +100,15 @@ Julia is not available on the cluster. We need to install it manually.
    ```shell
    $JL --project=. -e 'using Pkg; Pkg.instantiate()'
    ```
-   This will take some time!
+   This will take some time! Some packages might throw errors.
 
 
-## Precompile Trixi.jl
+### Check installation
 1. Make sure that everything is precompiled by running the following:
    ```shell
    $JL --project=. -e 'using OrdinaryDiffEq, Trixi'
    ```
+   If there are still some errors, they might get resolved when running on compute nodes.
 2. To test CUDA, first log in to a GPU node:
    ```shell
    salloc --cluster=hpda2 --partition=hpda2_testgpu --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --time=00:30:00
@@ -113,22 +119,23 @@ Julia is not available on the cluster. We need to install it manually.
    ```
    This should print
    ```
-   CUDA runtime 12.6, local installation
+   CUDA runtime 11.8, local installation
    ...
    ```
+   <!--
    If it fails, it might help to re-set the CUDA runtime:
    ```shell
-   $JL --project=. -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber(12,6); local_toolkit=true)
+   $JL --project=. -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber(11,8); local_toolkit=true)
    ```
-   <!--
+   
    pkg = Base.PkgId(Base.UUID("76a88914-d11a-5bdc-97e0-2f5a05c973a2"), "CUDA_Runtime_jll")
    Base.compilecache(pkg)
    -->
 
 
 ## Launch
-1. SLURM jobscripts can be found in `jobscripts`. Edit as necessary. At least, you have to
-   specify your mail address.
+1. SLURM jobscripts are in `jobscripts`. Edit as necessary. At least, you have to specify
+   your mail address.
 2. The actual simulation is configured in `run.jl` and based on Trixi.jl files in `elixirs`.
 3. Send job to queue:
    ```shell