From 4a137bd28575bbab788d0b956366c945b0881f16 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 22 Aug 2024 09:56:35 +0200
Subject: [PATCH 01/19] use Base.min/max in MPI.Allreduce

MPI.jl's reduce currently does not work for custom operators (such as Trixi's
min/max) on ARM
---
 src/callbacks_step/analysis.jl               |  2 +-
 src/callbacks_step/analysis_dg2d_parallel.jl |  2 +-
 src/callbacks_step/analysis_dg3d_parallel.jl |  2 +-
 src/callbacks_step/stepsize_dg2d.jl          | 12 ++++++------
 src/callbacks_step/stepsize_dg3d.jl          |  8 ++++----
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 860e3fa21d3..f6b1638d9b1 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -434,7 +434,7 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
-                global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+                global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
                 end
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 000daa015dc..fdf7d2ea6c0 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,7 +131,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index de777be406d..4f9b9ccd27f 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,7 +49,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 41251506a0d..08e57671ec8 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,7 +54,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -70,7 +70,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -154,7 +154,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -170,7 +170,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -186,7 +186,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -202,7 +202,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index 664596f989e..c6d4389c020 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -130,7 +130,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -146,7 +146,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -162,7 +162,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -178,7 +178,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end

From ed2540d6cf04454382058ee76fbe1daabf06aa71 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 12:39:41 +0200
Subject: [PATCH 02/19] add comments

---
 src/callbacks_step/analysis.jl               | 1 +
 src/callbacks_step/analysis_dg2d_parallel.jl | 1 +
 src/callbacks_step/analysis_dg3d_parallel.jl | 1 +
 src/callbacks_step/stepsize_dg2d.jl          | 6 ++++++
 src/callbacks_step/stepsize_dg3d.jl          | 4 ++++
 5 files changed, 13 insertions(+)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index f6b1638d9b1..24dd6eb5b63 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -434,6 +434,7 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
+                # Base.max needed, see comment in src/auxiliary/math.jl
                 global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index fdf7d2ea6c0..af4b92ac822 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,6 +131,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
+    # Base.max needed, see comment in src/auxiliary/math.jl
     MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index 4f9b9ccd27f..03c4038d88f 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,6 +49,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
+    # Base.max needed, see comment in src/auxiliary/math.jl
     MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 08e57671ec8..65a7ecf228d 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,6 +54,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -70,6 +71,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -154,6 +156,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -170,6 +173,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -186,6 +190,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -202,6 +207,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index c6d4389c020..e8200a35625 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -130,6 +130,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -146,6 +147,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -162,6 +164,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -178,6 +181,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
+    # Base.min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt

From cc1147d74a43d30f628428e945ed27b384eba3fb Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 12:42:09 +0200
Subject: [PATCH 03/19] explain workdaround

---
 src/auxiliary/math.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
index fa816da9a1e..8f442a5b220 100644
--- a/src/auxiliary/math.jl
+++ b/src/auxiliary/math.jl
@@ -284,6 +284,11 @@ end
 # when using `@fastmath`, which we also get from
 # [Fortran](https://godbolt.org/z/Yrsa1js7P)
 # or [C++](https://godbolt.org/z/674G7Pccv).
+#
+# Note however that such a custom reimplementation can cause incompatibilites with other
+# packages. Currently we are affected by an issue with MPI.jl on ARM, see
+# https://github.com/trixi-framework/Trixi.jl/issues/1922
+# The workaround is to resort to Base.min / Base.max when using MPI reductions.
 """
     Trixi.max(x, y, ...)
 

From fb8a769eee6723221d5cf57ae88de5d2f3a3029b Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 12:43:42 +0200
Subject: [PATCH 04/19] typo

---
 src/auxiliary/math.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
index 8f442a5b220..6ae09588861 100644
--- a/src/auxiliary/math.jl
+++ b/src/auxiliary/math.jl
@@ -285,7 +285,7 @@ end
 # [Fortran](https://godbolt.org/z/Yrsa1js7P)
 # or [C++](https://godbolt.org/z/674G7Pccv).
 #
-# Note however that such a custom reimplementation can cause incompatibilites with other
+# Note however that such a custom reimplementation can cause incompatibilities with other
 # packages. Currently we are affected by an issue with MPI.jl on ARM, see
 # https://github.com/trixi-framework/Trixi.jl/issues/1922
 # The workaround is to resort to Base.min / Base.max when using MPI reductions.

From 7cd8456683227f731e87f6bd40d7a672f8c7550a Mon Sep 17 00:00:00 2001
From: Benedict <135045760+benegee@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:59:35 +0200
Subject: [PATCH 05/19] Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/callbacks_step/analysis.jl               |  2 +-
 src/callbacks_step/analysis_dg2d_parallel.jl |  2 +-
 src/callbacks_step/analysis_dg3d_parallel.jl |  2 +-
 src/callbacks_step/stepsize_dg2d.jl          | 12 ++++++------
 src/callbacks_step/stepsize_dg3d.jl          |  8 ++++----
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 24dd6eb5b63..06110d08d28 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -434,7 +434,7 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
-                # Base.max needed, see comment in src/auxiliary/math.jl
+                # Base.max instead of max needed, see comment in src/auxiliary/math.jl
                 global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index af4b92ac822..5b3ae858ab7 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,7 +131,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    # Base.max needed, see comment in src/auxiliary/math.jl
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
     MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index 03c4038d88f..70a616367cd 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,7 +49,7 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    # Base.max needed, see comment in src/auxiliary/math.jl
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
     MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 65a7ecf228d..c7922cecc66 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,7 +54,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -71,7 +71,7 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -156,7 +156,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -173,7 +173,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -190,7 +190,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -207,7 +207,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index e8200a35625..49976de6505 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -130,7 +130,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -147,7 +147,7 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -164,7 +164,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
@@ -181,7 +181,7 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    # Base.min needed, see comment in src/auxiliary/math.jl
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
     dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt

From 246f3ac2f347e6278d6a934afbc55f376405840c Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 15:02:57 +0200
Subject: [PATCH 06/19] switch to macos-latest in mpi tests

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c636ee8b0b..61a1a594a23 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,7 +90,7 @@ jobs:
             arch: x64
             trixi_test: threaded_legacy
           - version: '1.10'
-            os: macos-13
+            os: macos-latest
             arch: x64
             trixi_test: mpi
           - version: '1.10'

From 1d6e7f9a8e46cbcfc78b774e43f839faa2a44ba9 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 22:03:46 +0200
Subject: [PATCH 07/19] remove arch specification for macos-latest

macos-latest is 14, which is ARM
---
 .github/workflows/ci.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61a1a594a23..90eaade4fda 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -91,11 +91,9 @@ jobs:
             trixi_test: threaded_legacy
           - version: '1.10'
             os: macos-latest
-            arch: x64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest
-            arch: aarch64
             trixi_test: threaded
           - version: '1.10'
             os: windows-latest

From db209ec9fed1bbf8363c53e1cd61e4f3aff8fc47 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Fri, 30 Aug 2024 22:14:28 +0200
Subject: [PATCH 08/19] readd arch, required by julia-actions/setup-julia

---
 .github/workflows/ci.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 90eaade4fda..05d09bfd31d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -91,9 +91,11 @@ jobs:
             trixi_test: threaded_legacy
           - version: '1.10'
             os: macos-latest
+            arch: aarch64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest
+            arch: aarch64
             trixi_test: threaded
           - version: '1.10'
             os: windows-latest

From 940ee69463c83b1242fe320d2e2eef0f24da59b2 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 09:39:52 +0200
Subject: [PATCH 09/19] check out Valentin's MPI custom ops

---
 .github/workflows/ci.yml                     | 1 +
 src/auxiliary/mpi.jl                         | 7 +++++++
 src/callbacks_step/analysis_dg2d_parallel.jl | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 05d09bfd31d..f05c5dc4542 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,6 +116,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
+      - run: julia -e 'using Pkg; Pkg.add("MPI#vc/custom_ops")'
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:
diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index c85c23670b0..434f5dbd753 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -128,3 +128,10 @@ parallel execution of Trixi.jl.
 See the "Miscellaneous" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/).
 """
 ode_unstable_check(dt, u, semi, t) = isnan(dt)
+
+# Custom MPI operators to work around
+# https://github.com/trixi-framework/Trixi.jl/issues/1922
+function reduce_vector_plus(x, y)
+    x .+ y
+end
+MPI.@Op(reduce_vector_plus, SVector)
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 5b3ae858ab7..856a00688c3 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -162,7 +162,7 @@ function integrate_via_indices(func::Func, u,
                             normalize = normalize)
 
     # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-    global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(local_integral), reduce_vector_plus, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(local_integral), global_integral[])
     else
@@ -195,7 +195,7 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(integral), global_integral[])

From e46deebb10ca7bda861428d884ccf505b9ef7ec1 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 09:44:10 +0200
Subject: [PATCH 10/19] pkg command

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f05c5dc4542..d072d4789de 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
-      - run: julia -e 'using Pkg; Pkg.add("MPI#vc/custom_ops")'
+      - run: julia -e 'using Pkg; pkg"add MPI#vc/custom_ops")'
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:

From 035b35d987867d192f0dd13cba715ed5826192f3 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 09:47:30 +0200
Subject: [PATCH 11/19] hmpf

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d072d4789de..87d286d89cb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
-      - run: julia -e 'using Pkg; pkg"add MPI#vc/custom_ops")'
+      - run: julia -e 'using Pkg; pkg"add MPI#vc/custom_ops"'
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:

From 7154dae43249ba0f843167422869b4b27a254b1c Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 09:54:53 +0200
Subject: [PATCH 12/19] project path

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 87d286d89cb..74fe4b20eae 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
-      - run: julia -e 'using Pkg; pkg"add MPI#vc/custom_ops"'
+      - run: julia --project=@. -e 'using Pkg; pkg"add MPI#vc/custom_ops"'
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:

From ff513ba4e47c2ae6e7d12a1f76fde98c061795a0 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 09:55:44 +0200
Subject: [PATCH 13/19] format

---
 src/callbacks_step/analysis_dg2d_parallel.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 856a00688c3..ac3faa42f46 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -162,7 +162,8 @@ function integrate_via_indices(func::Func, u,
                             normalize = normalize)
 
     # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-    global_integral = MPI.Reduce!(Ref(local_integral), reduce_vector_plus, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(local_integral), reduce_vector_plus, mpi_root(),
+                                  mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(local_integral), global_integral[])
     else
@@ -195,7 +196,8 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(),
+                                  mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(integral), global_integral[])

From bfee6728ff9b248df9f7aeb426a1dfbf8ea7b65a Mon Sep 17 00:00:00 2001
From: Benedict <135045760+benegee@users.noreply.github.com>
Date: Thu, 5 Sep 2024 13:32:34 +0200
Subject: [PATCH 14/19] Use concrete types

Thank vchuravy!
---
 src/auxiliary/mpi.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index 434f5dbd753..4eddf8cf025 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -134,4 +134,5 @@ ode_unstable_check(dt, u, semi, t) = isnan(dt)
 function reduce_vector_plus(x, y)
     x .+ y
 end
-MPI.@Op(reduce_vector_plus, SVector)
+MPI.@Op(reduce_vector_plus, SVector{4, Float64})
+MPI.@Op(reduce_vector_plus, SVector{5, Float64})

From 6140e985ee167cbdf18a4a95c3b4249265311659 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 16:12:16 +0200
Subject: [PATCH 15/19] different approach: use Ptr to Vector and native +

---
 .github/workflows/ci.yml                     |  1 -
 src/auxiliary/mpi.jl                         |  8 --------
 src/callbacks_step/analysis_dg2d_parallel.jl | 15 ++++++++++++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 74fe4b20eae..05d09bfd31d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,6 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""
-      - run: julia --project=@. -e 'using Pkg; pkg"add MPI#vc/custom_ops"'
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:
diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index 4eddf8cf025..c85c23670b0 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -128,11 +128,3 @@ parallel execution of Trixi.jl.
 See the "Miscellaneous" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/).
 """
 ode_unstable_check(dt, u, semi, t) = isnan(dt)
-
-# Custom MPI operators to work around
-# https://github.com/trixi-framework/Trixi.jl/issues/1922
-function reduce_vector_plus(x, y)
-    x .+ y
-end
-MPI.@Op(reduce_vector_plus, SVector{4, Float64})
-MPI.@Op(reduce_vector_plus, SVector{5, Float64})
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index ac3faa42f46..e3c287dee89 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -162,10 +162,19 @@ function integrate_via_indices(func::Func, u,
                             normalize = normalize)
 
     # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-    global_integral = MPI.Reduce!(Ref(local_integral), reduce_vector_plus, mpi_root(),
-                                  mpi_comm())
+    if local_integral isa Real
+        global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(local_integral)), +, mpi_root(), mpi_comm())
+    end
+
     if mpi_isroot()
-        integral = convert(typeof(local_integral), global_integral[])
+        if local_integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(local_integral))
+            integral = convert(typeof(local_integral), global_wrapped)
+        end
     else
         integral = convert(typeof(local_integral), NaN * local_integral)
     end

From 6c659f54c2cdda4b0fd85cb8c192590f638afa83 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 18:27:17 +0200
Subject: [PATCH 16/19] back to macos-13 and x64

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 05d09bfd31d..0c636ee8b0b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,8 +90,8 @@ jobs:
             arch: x64
             trixi_test: threaded_legacy
           - version: '1.10'
-            os: macos-latest
-            arch: aarch64
+            os: macos-13
+            arch: x64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest

From 8880eff4feecbb195338bb850b7c646b733a9c55 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 18:37:43 +0200
Subject: [PATCH 17/19] move to aarch64 for macos

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c636ee8b0b..05d09bfd31d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,8 +90,8 @@ jobs:
             arch: x64
             trixi_test: threaded_legacy
           - version: '1.10'
-            os: macos-13
-            arch: x64
+            os: macos-latest
+            arch: aarch64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest

From a7d300c48ae5f2746710daba3ca05c7bb918e9c1 Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Thu, 5 Sep 2024 18:45:40 +0200
Subject: [PATCH 18/19] remove leftover

---
 src/callbacks_step/analysis_dg2d_parallel.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index e3c287dee89..26710e09a75 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -205,8 +205,7 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(),
-                                  mpi_comm())
+    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(integral), global_integral[])

From 653abff679ce9450788d95177779270e33b98d8b Mon Sep 17 00:00:00 2001
From: Benedict Geihe <bgeihe@uni-koeln.de>
Date: Tue, 10 Sep 2024 22:31:37 +0200
Subject: [PATCH 19/19] same for P4estMesh and T8codeMesh

---
 src/callbacks_step/analysis_dg2d_parallel.jl | 18 +++++++++++++++---
 src/callbacks_step/analysis_dg3d_parallel.jl | 14 ++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 26710e09a75..707f6e6d94c 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -165,7 +165,9 @@ function integrate_via_indices(func::Func, u,
     if local_integral isa Real
         global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
     else
-        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(local_integral)), +, mpi_root(), mpi_comm())
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64},
+                                                          Ref(local_integral)), +,
+                                      mpi_root(), mpi_comm())
     end
 
     if mpi_isroot()
@@ -205,10 +207,20 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    if integral isa Real
+        global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(integral)),
+                                      +, mpi_root(), mpi_comm())
+    end
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
-        integral = convert(typeof(integral), global_integral[])
+        if integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(integral))
+            integral = convert(typeof(integral), global_wrapped)
+        end
     else
         integral = convert(typeof(integral), NaN * integral)
         total_volume = volume # non-root processes receive nothing from reduce -> overwrite
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index 70a616367cd..1d167652c63 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -88,10 +88,20 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    if integral isa Real
+        global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(integral)),
+                                      +, mpi_root(), mpi_comm())
+    end
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
-        integral = convert(typeof(integral), global_integral[])
+        if integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(integral))
+            integral = convert(typeof(integral), global_wrapped)
+        end
     else
         integral = convert(typeof(integral), NaN * integral)
         total_volume = volume # non-root processes receive nothing from reduce -> overwrite