Skip to content

Commit

Permalink
Use Givens rotations instead of MINPACK.jl in linear_solve!()
Browse files Browse the repository at this point in the history
Using Givens rotations (following 'Algorithm 2' in Zou (2023)) avoids
the need for least-squares minimisations at each iteration of the GMRES
linear solver.
  • Loading branch information
johnomotani committed Sep 27, 2024
1 parent b767d22 commit 6a27413
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 86 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,4 @@ jobs:
touch Project.toml
julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()'
# Reduce nstep for each example to 10 to avoid the CI job taking too long
# Note we skip the example `if (occursin("ARK", get(t_input, "type", "") && Sys.isapple())`
# because the way we use MINPACK.jl (needed for nonlinear solvers
# used for implicit parts of timestep) doesn't currently work on
# macOS.
julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if ((occursin("ARK", get(t_input, "type", "")) || occursin("PareschiRusso", get(t_input, "type", "")) || occursin("kinetic_electrons", get(get(input, "composition", Dict{String,Any}()), "electron_physics", "boltzmann_electron_response"))) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end'
julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end'
1 change: 0 additions & 1 deletion moment_kinetics/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882"
LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
Expand Down
106 changes: 60 additions & 46 deletions moment_kinetics/src/nonlinear_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Useful references:
[3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method
[4] https://www.rikvoorhaar.com/blog/gmres
[5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003.
[6] Q. Zou, "GMRES algorithms over 35 years", Applied Mathematics and Computation 445, 127869 (2023), https://doi.org/10.1016/j.amc.2023.127869
"""
module nonlinear_solvers

Expand All @@ -36,12 +37,11 @@ using ..looping
using ..type_definitions: mk_float, mk_int

using LinearAlgebra
using MINPACK
using MPI
using SparseArrays
using StatsBase: mean

struct nl_solver_info{TH,TV,Tlig,Tprecon}
struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon}
rtol::mk_float
atol::mk_float
nonlinear_max_iterations::mk_int
Expand All @@ -51,6 +51,9 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon}
linear_max_restarts::mk_int
H::TH
V::TV
c::Tcsg
s::Tcsg
g::Tcsg
linear_initial_guess::Tlig
n_solves::Ref{mk_int}
nonlinear_iterations::Ref{mk_int}
Expand Down Expand Up @@ -108,29 +111,47 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa

if serial_solve
H = allocate_float(linear_restart + 1, linear_restart)
c = allocate_float(linear_restart + 1)
s = allocate_float(linear_restart + 1)
g = allocate_float(linear_restart + 1)
V = allocate_float(reverse(coord_sizes)..., linear_restart+1)
H .= 0.0
c .= 0.0
s .= 0.0
g .= 0.0
V .= 0.0
elseif electron_ppar_pdf_solve
H = allocate_shared_float(linear_restart + 1, linear_restart)
c = allocate_shared_float(linear_restart + 1)
s = allocate_shared_float(linear_restart + 1)
g = allocate_shared_float(linear_restart + 1)
V_ppar = allocate_shared_float(coords.z.n, linear_restart+1)
V_pdf = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1)

begin_serial_region()
@serial_region begin
H .= 0.0
c .= 0.0
s .= 0.0
g .= 0.0
V_ppar .= 0.0
V_pdf .= 0.0
end

V = (V_ppar, V_pdf)
else
H = allocate_shared_float(linear_restart + 1, linear_restart)
c = allocate_shared_float(linear_restart + 1)
s = allocate_shared_float(linear_restart + 1)
g = allocate_shared_float(linear_restart + 1)
V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1)

begin_serial_region()
@serial_region begin
H .= 0.0
c .= 0.0
s .= 0.0
g .= 0.0
V .= 0.0
end
end
Expand Down Expand Up @@ -167,8 +188,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol,
nl_solver_input.nonlinear_max_iterations,
nl_solver_input.linear_rtol, nl_solver_input.linear_atol,
linear_restart, nl_solver_input.linear_max_restarts, H, V,
linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
linear_restart, nl_solver_input.linear_max_restarts, H, V, c, s,
g, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
Ref(0), Ref(nl_solver_input.preconditioner_update_interval),
Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type,
nl_solver_input.preconditioner_update_interval, preconditioners)
Expand Down Expand Up @@ -324,8 +345,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
max_restarts=nl_solver_params.linear_max_restarts,
left_preconditioner=left_preconditioner,
right_preconditioner=right_preconditioner,
H=nl_solver_params.H, V=nl_solver_params.V,
rhs_delta=rhs_delta,
H=nl_solver_params.H, c=nl_solver_params.c,
s=nl_solver_params.s, g=nl_solver_params.g,
V=nl_solver_params.V, rhs_delta=rhs_delta,
initial_guess=nl_solver_params.linear_initial_guess,
distributed_norm=distributed_norm,
distributed_dot=distributed_dot,
Expand Down Expand Up @@ -1063,11 +1085,16 @@ end
"""
Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed
at each step of the outer Newton iteration (in `newton_solve!()`).
Uses Givens rotations to reduce the upper Hessenberg matrix to an upper triangular form,
which allows conveniently finding the residual at each step, and computing the final
solution, without calculating a least-squares minimisation at each step. See 'algorithm 2
MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
"""
function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol,
restart, max_restarts, left_preconditioner, right_preconditioner,
H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot,
parallel_map, parallel_delta_x_calc, serial_solve)
H, c, s, g, V, rhs_delta, initial_guess, distributed_norm,
distributed_dot, parallel_map, parallel_delta_x_calc, serial_solve)
# Solve (approximately?):
# J δx = residual0

Expand Down Expand Up @@ -1105,6 +1132,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
parallel_map((residual0, v) -> -residual0 - v, w, residual0, v)
beta = distributed_norm(w)
parallel_map((w) -> w/beta, select_from_V(V, 1), w)
g[1] = beta

# Set tolerance for GMRES iteration to rtol times the initial residual, unless this is
# so small that it is smaller than atol, in which case use atol instead.
Expand All @@ -1115,7 +1143,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
counter = 0
restart_counter = 1
while true
for i 1:restart
i = 0
while i < restart
i += 1
counter += 1
#println("Linear ", counter)

Expand Down Expand Up @@ -1148,52 +1178,36 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol
end
parallel_map((w) -> w / H[i+1,i], select_from_V(V, i+1), w)

function temporary_residual!(result, guess)
#println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess))
result .= @view(H[1:i+1,1:i]) * guess
result[1] -= beta
end

# Second argument to fsolve needs to be a Vector{Float64}
if serial_solve
resize!(initial_guess, i)
initial_guess[1] = beta
initial_guess[2:i] .= 0.0
lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
residual = norm(lsq_result.f)
else
begin_serial_region()
if global_rank[] == 0
resize!(initial_guess, i)
initial_guess[1] = beta
initial_guess[2:i] .= 0.0
lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
residual = norm(lsq_result.f)
else
residual = nothing
begin_serial_region()
@serial_region begin
for j 1:i-1
gamma = c[j] * H[j,i] + s[j] * H[j+1,i]
H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i]
H[j,i] = gamma
end
residual = MPI.bcast(residual, comm_world; root=0)
delta = sqrt(H[i,i]^2 + H[i+1,i]^2)
s[i] = H[i+1,i] / delta
c[i] = H[i,i] / delta
H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i]
H[i+1,i] = 0
g[i+1] = -s[i] * g[i]
g[i] = c[i] * g[i]
end
_block_synchronize()
residual = abs(g[i+1])

if residual < tol
break
end
end

# Update initial guess fo restart
if serial_solve
y = lsq_result.x
else
if global_rank[] == 0
y = lsq_result.x
else
y = nothing
end
y = MPI.bcast(y, comm_world; root=0)
end
# Update initial guess to restart
#################################

@views y = H[1:i,1:i] \ g[1:i]

# The following is the `parallel_map()` version of
# The following calculates
# delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y))
# slightly abusing splatting to get the sum into a lambda-function.
parallel_delta_x_calc(delta_x, V, y)
right_preconditioner(delta_x)

Expand Down
29 changes: 12 additions & 17 deletions moment_kinetics/test/braginskii_electrons_imex_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -277,24 +277,19 @@ function runtests()
@testset "Braginskii electron IMEX timestepping" verbose=use_verbose begin
println("Braginskii electron IMEX timestepping tests")

if Sys.isapple()
@testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
end
else
@testset "Split 3" begin
test_input["output"]["base_directory"] = test_output_directory
run_test(test_input, expected_p, expected_q, expected_vt)
end
@long @testset "Check other timestep - $type" for
type ("KennedyCarpenterARK437",)
@testset "Split 3" begin
test_input["output"]["base_directory"] = test_output_directory
run_test(test_input, expected_p, expected_q, expected_vt)
end
@long @testset "Check other timestep - $type" for
type ("KennedyCarpenterARK437",)

timestep_check_input = deepcopy(test_input)
timestep_check_input["output"]["base_directory"] = test_output_directory
timestep_check_input["output"]["run_name"] = type
timestep_check_input["timestepping"]["type"] = type
run_test(timestep_check_input, expected_p, expected_q, expected_vt,
rtol=2.e-4, atol=1.e-10)
end
timestep_check_input = deepcopy(test_input)
timestep_check_input["output"]["base_directory"] = test_output_directory
timestep_check_input["output"]["run_name"] = type
timestep_check_input["timestepping"]["type"] = type
run_test(timestep_check_input, expected_p, expected_q, expected_vt,
rtol=2.e-4, atol=1.e-10)
end
end

Expand Down
5 changes: 0 additions & 5 deletions moment_kinetics/test/kinetic_electron_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,6 @@ function run_test()
end

function runtests()
if Sys.isapple()
@testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
end
return nothing
end
@testset "kinetic electrons" begin
println("Kinetic electron tests")
run_test()
Expand Down
16 changes: 4 additions & 12 deletions moment_kinetics/test/nonlinear_solver_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -272,18 +272,10 @@ function nonlinear_test()
end

function runtests()
if Sys.isapple()
@testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
println("non-linear solver tests")
linear_test()
nonlinear_test()
end
else
@testset "non-linear solvers" begin
println("non-linear solver tests")
linear_test()
nonlinear_test()
end
@testset "non-linear solvers" begin
println("non-linear solver tests")
linear_test()
nonlinear_test()
end
end

Expand Down

0 comments on commit 6a27413

Please sign in to comment.