Use Givens rotations instead of MINPACK.jl in linear_solve!()

Using Givens rotations (following 'Algorithm 2' in Zou (2023)) avoids the need for least-squares minimisations at each iteration of the GMRES linear solver.
mabarnes · Sep 27, 2024 · 6a27413 · 6a27413
1 parent b767d22
commit 6a27413
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 86 deletions.
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
@@ -23,8 +23,4 @@ jobs:
           touch Project.toml
           julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()'
           # Reduce nstep for each example to 10 to avoid the CI job taking too long
-          # Note we skip the example `if (occursin("ARK", get(t_input, "type", "") && Sys.isapple())`
-          # because the way we use MINPACK.jl (needed for nonlinear solvers
-          # used for implicit parts of timestep) doesn't currently work on
-          # macOS.
-          julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if ((occursin("ARK", get(t_input, "type", "")) || occursin("PareschiRusso", get(t_input, "type", "")) || occursin("kinetic_electrons", get(get(input, "composition", Dict{String,Any}()), "electron_physics", "boltzmann_electron_response"))) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end'
+          julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end'
diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml
@@ -17,7 +17,6 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882"
 LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
-MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
 Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"

diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
@@ -22,6 +22,7 @@ Useful references:
 [3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method
 [4] https://www.rikvoorhaar.com/blog/gmres
 [5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. 
+[6] Q. Zou, "GMRES algorithms over 35 years", Applied Mathematics and Computation 445, 127869 (2023), https://doi.org/10.1016/j.amc.2023.127869
 """
 module nonlinear_solvers
 
@@ -36,12 +37,11 @@ using ..looping
 using ..type_definitions: mk_float, mk_int
 
 using LinearAlgebra
-using MINPACK
 using MPI
 using SparseArrays
 using StatsBase: mean
 
-struct nl_solver_info{TH,TV,Tlig,Tprecon}
+struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon}
     rtol::mk_float
     atol::mk_float
     nonlinear_max_iterations::mk_int
@@ -51,6 +51,9 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon}
     linear_max_restarts::mk_int
     H::TH
     V::TV
+    c::Tcsg
+    s::Tcsg
+    g::Tcsg
     linear_initial_guess::Tlig
     n_solves::Ref{mk_int}
     nonlinear_iterations::Ref{mk_int}
@@ -108,29 +111,47 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
 
     if serial_solve
         H = allocate_float(linear_restart + 1, linear_restart)
+        c = allocate_float(linear_restart + 1)
+        s = allocate_float(linear_restart + 1)
+        g = allocate_float(linear_restart + 1)
         V = allocate_float(reverse(coord_sizes)..., linear_restart+1)
         H .= 0.0
+        c .= 0.0
+        s .= 0.0
+        g .= 0.0
         V .= 0.0
     elseif electron_ppar_pdf_solve
         H = allocate_shared_float(linear_restart + 1, linear_restart)
+        c = allocate_shared_float(linear_restart + 1)
+        s = allocate_shared_float(linear_restart + 1)
+        g = allocate_shared_float(linear_restart + 1)
         V_ppar = allocate_shared_float(coords.z.n, linear_restart+1)
         V_pdf = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1)
 
         begin_serial_region()
         @serial_region begin
             H .= 0.0
+            c .= 0.0
+            s .= 0.0
+            g .= 0.0
             V_ppar .= 0.0
             V_pdf .= 0.0
         end
 
         V = (V_ppar, V_pdf)
     else
         H = allocate_shared_float(linear_restart + 1, linear_restart)
+        c = allocate_shared_float(linear_restart + 1)
+        s = allocate_shared_float(linear_restart + 1)
+        g = allocate_shared_float(linear_restart + 1)
         V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1)
 
         begin_serial_region()
         @serial_region begin
             H .= 0.0
+            c .= 0.0
+            s .= 0.0
+            g .= 0.0
             V .= 0.0
         end
     end
@@ -167,8 +188,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
     return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol,
                           nl_solver_input.nonlinear_max_iterations,
                           nl_solver_input.linear_rtol, nl_solver_input.linear_atol,
-                          linear_restart, nl_solver_input.linear_max_restarts, H, V,
-                          linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
+                          linear_restart, nl_solver_input.linear_max_restarts, H, V, c, s,
+                          g, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
                           Ref(0), Ref(nl_solver_input.preconditioner_update_interval),
                           Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type,
                           nl_solver_input.preconditioner_update_interval, preconditioners)
@@ -324,8 +345,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
                                    max_restarts=nl_solver_params.linear_max_restarts,
                                    left_preconditioner=left_preconditioner,
                                    right_preconditioner=right_preconditioner,
-                                   H=nl_solver_params.H, V=nl_solver_params.V,
-                                   rhs_delta=rhs_delta,
+                                   H=nl_solver_params.H, c=nl_solver_params.c,
+                                   s=nl_solver_params.s, g=nl_solver_params.g,
+                                   V=nl_solver_params.V, rhs_delta=rhs_delta,
                                    initial_guess=nl_solver_params.linear_initial_guess,
                                    distributed_norm=distributed_norm,
                                    distributed_dot=distributed_dot,
@@ -1063,11 +1085,16 @@ end
 """
 Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed
 at each step of the outer Newton iteration (in `newton_solve!()`).
+
+Uses Givens rotations to reduce the upper Hessenberg matrix to an upper triangular form,
+which allows conveniently finding the residual at each step, and computing the final
+solution, without calculating a least-squares minimisation at each step. See 'algorithm 2
+MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
 """
 function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol,
                        restart, max_restarts, left_preconditioner, right_preconditioner,
-                       H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot,
-                       parallel_map, parallel_delta_x_calc, serial_solve)
+                       H, c, s, g, V, rhs_delta, initial_guess, distributed_norm,
+                       distributed_dot, parallel_map, parallel_delta_x_calc, serial_solve)
     # Solve (approximately?):
     #   J δx = residual0
 
@@ -1105,6 +1132,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
     parallel_map((residual0, v) -> -residual0 - v, w, residual0, v)
     beta = distributed_norm(w)
     parallel_map((w) -> w/beta, select_from_V(V, 1), w)
+    g[1] = beta
 
     # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is
     # so small that it is smaller than atol, in which case use atol instead.
@@ -1115,7 +1143,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
     counter = 0
     restart_counter = 1
     while true
-        for i ∈ 1:restart
+        i = 0
+        while i < restart
+            i += 1
             counter += 1
             #println("Linear ", counter)
 
@@ -1148,52 +1178,36 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
             end
             parallel_map((w) -> w / H[i+1,i], select_from_V(V, i+1), w)
 
-            function temporary_residual!(result, guess)
-                #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess))
-                result .= @view(H[1:i+1,1:i]) * guess
-                result[1] -= beta
-            end
-
-            # Second argument to fsolve needs to be a Vector{Float64}
-            if serial_solve
-                resize!(initial_guess, i)
-                initial_guess[1] = beta
-                initial_guess[2:i] .= 0.0
-                lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
-                residual = norm(lsq_result.f)
-            else
-                begin_serial_region()
-                if global_rank[] == 0
-                    resize!(initial_guess, i)
-                    initial_guess[1] = beta
-                    initial_guess[2:i] .= 0.0
-                    lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
-                    residual = norm(lsq_result.f)
-                else
-                    residual = nothing
+            begin_serial_region()
+            @serial_region begin
+                for j ∈ 1:i-1
+                    gamma = c[j] * H[j,i] + s[j] * H[j+1,i]
+                    H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i]
+                    H[j,i] = gamma
                 end
-                residual = MPI.bcast(residual, comm_world; root=0)
+                delta = sqrt(H[i,i]^2 + H[i+1,i]^2)
+                s[i] = H[i+1,i] / delta
+                c[i] = H[i,i] / delta
+                H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i]
+                H[i+1,i] = 0
+                g[i+1] = -s[i] * g[i]
+                g[i] = c[i] * g[i]
             end
+            _block_synchronize()
+            residual = abs(g[i+1])
+
             if residual < tol
                 break
             end
         end
 
-        # Update initial guess fo restart
-        if serial_solve
-            y = lsq_result.x
-        else
-            if global_rank[] == 0
-                y = lsq_result.x
-            else
-                y = nothing
-            end
-            y = MPI.bcast(y, comm_world; root=0)
-        end
+        # Update initial guess to restart
+        #################################
+
+        @views y = H[1:i,1:i] \ g[1:i]
 
-        # The following is the `parallel_map()` version of
+        # The following calculates
         #    delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y))
-        # slightly abusing splatting to get the sum into a lambda-function.
         parallel_delta_x_calc(delta_x, V, y)
         right_preconditioner(delta_x)
 

diff --git a/moment_kinetics/test/braginskii_electrons_imex_tests.jl b/moment_kinetics/test/braginskii_electrons_imex_tests.jl
@@ -277,24 +277,19 @@ function runtests()
     @testset "Braginskii electron IMEX timestepping" verbose=use_verbose begin
         println("Braginskii electron IMEX timestepping tests")
 
-        if Sys.isapple()
-            @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
-            end
-        else
-            @testset "Split 3" begin
-                test_input["output"]["base_directory"] = test_output_directory
-                run_test(test_input, expected_p, expected_q, expected_vt)
-            end
-            @long @testset "Check other timestep - $type" for
-                    type ∈ ("KennedyCarpenterARK437",)
+        @testset "Split 3" begin
+            test_input["output"]["base_directory"] = test_output_directory
+            run_test(test_input, expected_p, expected_q, expected_vt)
+        end
+        @long @testset "Check other timestep - $type" for
+                type ∈ ("KennedyCarpenterARK437",)
 
-                timestep_check_input = deepcopy(test_input)
-                timestep_check_input["output"]["base_directory"] = test_output_directory
-                timestep_check_input["output"]["run_name"] = type
-                timestep_check_input["timestepping"]["type"] = type
-                run_test(timestep_check_input, expected_p, expected_q, expected_vt,
-                         rtol=2.e-4, atol=1.e-10)
-            end
+            timestep_check_input = deepcopy(test_input)
+            timestep_check_input["output"]["base_directory"] = test_output_directory
+            timestep_check_input["output"]["run_name"] = type
+            timestep_check_input["timestepping"]["type"] = type
+            run_test(timestep_check_input, expected_p, expected_q, expected_vt,
+                     rtol=2.e-4, atol=1.e-10)
         end
     end
 

diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl
@@ -271,11 +271,6 @@ function run_test()
 end
 
 function runtests()
-    if Sys.isapple()
-        @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
-        end
-        return nothing
-    end
     @testset "kinetic electrons" begin
         println("Kinetic electron tests")
         run_test()

diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl
@@ -272,18 +272,10 @@ function nonlinear_test()
 end
 
 function runtests()
-    if Sys.isapple()
-        @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
-            println("non-linear solver tests")
-            linear_test()
-            nonlinear_test()
-        end
-    else
-        @testset "non-linear solvers" begin
-            println("non-linear solver tests")
-            linear_test()
-            nonlinear_test()
-        end
+    @testset "non-linear solvers" begin
+        println("non-linear solver tests")
+        linear_test()
+        nonlinear_test()
     end
 end