From a70cc16cd9532596d53823fcd3594e6279ba8a12 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 9 Aug 2024 17:49:20 +0100 Subject: [PATCH 001/107] "coarse_tails" option for grid spacing This can be used to make the grid spacing coarser at large wpa, which should help to relax CFL constraints in moment-kinetic runs. --- moment_kinetics/src/coordinates.jl | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index a18c2a75e..158de0074 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -293,7 +293,34 @@ function set_element_boundaries(nelement_global, L, element_spacing_option, coor for j in 1:nsqrt element_boundaries[(nelement_global+1)+ 1 - j] = (L/2.0) - fac*(L/2.0)*((j-1)/(nsqrt-1))^2 end - + elseif element_spacing_option == "coarse_tails" + # Element boundaries at + # + # x = (1 + (BT)^2 / 3) T tan(BT a) / (1 + (BT a)^2 / 3) + # + # where a = (i - 1 - c) / c, c = (n-1)/2, i is the grid index, so that a=-1 at + # i=1, a=1 at i=n and a=0 on the central grid point (if n is odd, so that there is + # a central point). Also B=1/T*atan(L/2T). + # + # Choosing x∼tan(a) gives dx/da∼1+x^2 so that we get grid spacing roughly + # proportional to x^2 for large |x|, which for w_∥ advection compensates the + # w_∥^2 terms in moment-kinetics so that the CFL condition should be roughly + # constant across the grid. The constant B.T multiplying a inside the tan() is + # chosen so that the transition between roughly constant spacing and roughly x^2 + # spacing happens at x=T. The (1 + (BT a)^2 / 3) denominator removes the quadratic + # part of the Taylor expansion of dx/da around a=0 so that we get a flatter region + # of grid spacing for |x| Date: Fri, 9 Aug 2024 19:01:11 +0100 Subject: [PATCH 002/107] Add "coarse_tails" grid spacing option to some example input files --- .../recycling-fraction/wall-bc_recyclefraction0.5-init.toml | 2 ++ examples/recycling-fraction/wall-bc_recyclefraction0.5.toml | 2 ++ .../wall-bc_recyclefraction0.5_split1.toml | 2 ++ .../wall-bc_recyclefraction0.5_split2.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3-init.toml | 6 ++++-- .../wall-bc_recyclefraction0.5_split3.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3_SSPRK4.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3_fekete104.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3_fekete42.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3_fekete64.toml | 2 ++ .../wall-bc_recyclefraction0.5_split3_rkf54.toml | 2 ++ 11 files changed, 24 insertions(+), 2 deletions(-) diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5-init.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5-init.toml index 9442b1dde..a5b99b38c 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5-init.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5-init.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 6 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5.toml index 70e81e234..4dd72f914 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split1.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split1.toml index 634959e88..382eec7df 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split1.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split1.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split2.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split2.toml index f1ba6bc68..bc5a2c300 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split2.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split2.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3-init.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3-init.toml index 7c6e17f90..a8126ae87 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3-init.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3-init.toml @@ -55,19 +55,21 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 6 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" nstep = 100000 dt = 1.0e-5 minimum_dt = 1.0e-6 -nwrite = 1000 -nwrite_dfns = 1000 +nwrite = 10000 +nwrite_dfns = 10000 split_operators = false steady_state_residual = true converged_residual_value = 1.0e-3 diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3.toml index 1352d2fdd..4e77b7ca1 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(3)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_SSPRK4.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_SSPRK4.toml index 73be81551..dc3bcc37c 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_SSPRK4.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_SSPRK4.toml @@ -55,11 +55,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] #nstep = 50000 diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete104.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete104.toml index 26a9fda8b..7c9851ca0 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete104.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete104.toml @@ -57,11 +57,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete10(4)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete42.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete42.toml index 7478a386e..0e42372f3 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete42.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete42.toml @@ -57,11 +57,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete4(2)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete64.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete64.toml index 3c86487ac..0601e8e6f 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete64.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_fekete64.toml @@ -57,11 +57,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "Fekete6(4)" diff --git a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_rkf54.toml b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_rkf54.toml index 8372db4cc..5de2b0464 100644 --- a/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_rkf54.toml +++ b/examples/recycling-fraction/wall-bc_recyclefraction0.5_split3_rkf54.toml @@ -57,11 +57,13 @@ vpa_nelement = 63 vpa_L = 36.0 vpa_bc = "zero" vpa_discretization = "chebyshev_pseudospectral" +vpa_element_spacing_option = "coarse_tails" vz_ngrid = 10 vz_nelement = 63 vz_L = 36.0 vz_bc = "zero" vz_discretization = "chebyshev_pseudospectral" +vz_element_spacing_option = "coarse_tails" [timestepping] type = "RKF5(4)" From 8bb41932bad699b0f4e45598ca4b9cdb982be432 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 11 Aug 2024 20:51:07 +0100 Subject: [PATCH 003/107] Fix handling of periodic boundary conditions in gauss_legendre.jl Only works when the dimension is not distributed (i.e. when `coord.nrank==1`). --- moment_kinetics/src/gauss_legendre.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 9a0afb6b3..f39a7ace5 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -849,16 +849,20 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, # N.B. QQ varies with ielement for vperp, but not vpa # a radau element is used for the vperp grid (see get_QQ_local!()) get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) + if coord.bc == "periodic" && coord.nrank != 1 + error("periodic boundary conditions not supported when dimension is distributed") + end if coord.bc == "periodic" && coord.nrank == 1 QQ_global[imax[end], imin[j]:imax[j]] .+= QQ_j[1,:] ./ 2.0 - QQ_global[imin[j],imin[j]:imax[j]] .+= QQ_j[1,:] ./ 2.0 + QQ_global[1,1] += 1.0 + QQ_global[1,end] += -1.0 else QQ_global[imin[j],imin[j]:imax[j]] .+= QQ_j[1,:] end for k in 2:imax[j]-imin[j] QQ_global[k,imin[j]:imax[j]] .+= QQ_j[k,:] end - if coord.nelement_local > 1 || (coord.bc == "periodic" && coord.nrank == 1) + if coord.nelement_local > 1 QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:]./2.0 else QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:] @@ -875,7 +879,6 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, if j == coord.nelement_local if coord.bc == "periodic" && coord.nrank == 1 QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] / 2.0 - QQ_global[imin[1],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] / 2.0 else QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] end @@ -884,7 +887,7 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, end end - if dirichlet_bc + if dirichlet_bc && !coord.bc == "periodic" # Make matrix diagonal for first/last grid points so it does not change the values # there if !(coord.name == "vperp") From 63a4e95b755d8b7e8b83f6bafcd8981fb214508a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 12:53:33 +0100 Subject: [PATCH 004/107] Fix `update_electrons` conditional `t_params.implicit_coefficient_is_zero` only has length `n_rk_stages`, so need to check `istage < n_rk_stages` before getting `t_params.implicit_coefficient_is_zero[n_rk_stages+1]`. --- moment_kinetics/src/time_advance.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 774028179..062d888e3 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3072,7 +3072,7 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, || t_params.implicit_coefficient_is_zero[istage+1]) update_electrons = (t_params.rk_coefs_implicit === nothing || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) - || t_params.implicit_coefficient_is_zero[istage+1] + || (istage < n_rk_stages && t_params.implicit_coefficient_is_zero[istage+1]) || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1])) diagnostic_moments = diagnostic_checks && istage == n_rk_stages success = apply_all_bcs_constraints_update_moments!( From ea601b345481464d6a19c2265ce5b8ef1351529c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 16:08:37 +0100 Subject: [PATCH 005/107] Fix ppar normalisation in distributed_dot_r_z_vperp_vpa() --- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 365b91295..c564f6083 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -685,7 +685,7 @@ function distributed_dot_r_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 2},Abstr if block_rank[] == 0 ppar_global_dot = MPI.Allreduce(ppar_block_dot, +, comm_inter_block[]) - ppar_global_dot = ppar_global_dot / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) + ppar_global_dot = ppar_global_dot / (r.n_global * z.n_global) else ppar_global_dot = nothing end From 9ad6f7f4040c3f4a9f4e5e222125f8c1aab54cc9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 22 Jun 2024 09:37:13 +0100 Subject: [PATCH 006/107] Estimate timescale for dg_e/dt, use to normalise residual for g_e solve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we just defined the residual for the electron distribution function solve to be 'dg/dt=0', then we would be asking the solver (roughly) to find g such that 'dg/dt 1 From c6c13b89edf700e41672f9386ce68ca6a6fbebfb Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 11:33:42 +0100 Subject: [PATCH 007/107] Fix RHS evaluation for backward-Euler update of electron_ppar `electron_kinetic_equation_euler_update!()` represents an explicit pseudo-timestep, so the -dt*(electron_ppar - electron_ppar_previous_ion_step)/ion_dt evaluation should be done with the 'old' pseudotimestep's electron_ppar, not the current estimate of the 'new' pseudotimestep's electron_ppar. --- moment_kinetics/src/electron_kinetic_equation.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index c6f256377..67a3f47ef 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1850,6 +1850,7 @@ function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, # Add source term to turn steady state solution into a backward-Euler update of # electron_ppar with the ion timestep `ion_dt`. ppar_out = fvec_out.electron_ppar + ppar_in = fvec_in.electron_ppar ppar_previous_ion_step = moments.electron.ppar begin_r_z_region() @loop_r_z ir iz begin @@ -1859,7 +1860,7 @@ function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, # resulting in a backward-Euler step (as long as the pseudo-timestepping # loop converges). - ppar_out[iz,ir] += -dt * (ppar_out[iz,ir] - ppar_previous_ion_step[iz,ir]) / ion_dt + ppar_out[iz,ir] += -dt * (ppar_in[iz,ir] - ppar_previous_ion_step[iz,ir]) / ion_dt end end end From 53986ef5c782a4ec62255813b0229c7272fda901 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 11 Aug 2024 20:43:56 +0100 Subject: [PATCH 008/107] Backward-Euler method for electron pseudotimestep Initial implementation - currently uses fixed timestep. --- .../src/electron_kinetic_equation.jl | 512 +++++++++++++++++- moment_kinetics/src/initial_conditions.jl | 10 +- moment_kinetics/src/time_advance.jl | 57 +- 3 files changed, 548 insertions(+), 31 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 67a3f47ef..5e938b2de 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -29,7 +29,7 @@ using ..krook_collisions: electron_krook_collisions! using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct -using ..nonlinear_solvers: newton_solve! +using ..nonlinear_solvers using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm, adaptive_timestep_update_t_params! using ..utils: get_minimum_CFL_z, get_minimum_CFL_vpa @@ -68,12 +68,12 @@ OUTPUT: function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, collisions, composition, external_source_settings, num_diss_params, - max_electron_pdf_iterations, max_electron_sim_time; io_electron=nothing, - initial_time=nothing, residual_tolerance=nothing, evolve_ppar=false, - ion_dt=nothing) + nl_solver_params, max_electron_pdf_iterations, max_electron_sim_time; + io_electron=nothing, initial_time=nothing, residual_tolerance=nothing, + evolve_ppar=false, ion_dt=nothing, solution_method="backward_euler") # set the method to use to solve the electron kinetic equation - solution_method = "artificial_time_derivative" + #solution_method = "artificial_time_derivative" #solution_method = "shooting_method" #solution_method = "picard_iteration" # solve the electron kinetic equation using the specified method @@ -84,6 +84,13 @@ function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_sp external_source_settings, num_diss_params, max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) + elseif solution_method == "backward_euler" + return electron_backward_euler!(scratch, pdf, moments, phi, collisions, + composition, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, t_params, external_source_settings, + num_diss_params, nl_solver_params, max_electron_pdf_iterations, + max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, + residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) elseif solution_method == "shooting_method" dens = moments.electron.dens vthe = moments.electron.vth @@ -111,7 +118,7 @@ function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_sp dppar_dz, dqpar_dz, dvth_dz, z, vpa, vpa_spectral, scratch_dummy, max_electron_pdf_iterations) else - error("!!! invalid solution method specified !!!") + error("!!! invalid solution method '$solution_method' specified !!!") end return nothing end @@ -589,6 +596,499 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll return success end +""" +Update the electron distribution function using backward-Euler for an artifical time +advance of the electron kinetic equation until a steady-state solution is reached. +""" +function electron_backward_euler!(scratch, pdf, moments, phi, collisions, composition, r, + z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, t_params, external_source_settings, num_diss_params, + nl_solver_params, max_electron_pdf_iterations, max_electron_sim_time; + io_electron=nothing, initial_time=nothing, residual_tolerance=nothing, + evolve_ppar=false, ion_dt=nothing) + + if max_electron_pdf_iterations === nothing && max_electron_sim_time === nothing + error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") + end + + begin_r_z_region() + + # create several (r) dimension dummy arrays for use in taking derivatives + buffer_r_1 = @view scratch_dummy.buffer_rs_1[:,1] + buffer_r_2 = @view scratch_dummy.buffer_rs_2[:,1] + buffer_r_3 = @view scratch_dummy.buffer_rs_3[:,1] + buffer_r_4 = @view scratch_dummy.buffer_rs_4[:,1] + buffer_r_5 = @view scratch_dummy.buffer_rs_5[:,1] + buffer_r_6 = @view scratch_dummy.buffer_rs_6[:,1] + + begin_r_z_region() + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron parallel pressure + moments.electron.vth[iz,ir] = sqrt(abs(2.0 * moments.electron.ppar[iz,ir] / + (moments.electron.dens[iz,ir] * + composition.me_over_mi))) + scratch[t_params.n_rk_stages+1].electron_ppar[iz,ir] = moments.electron.ppar[iz,ir] + end + calculate_electron_moment_derivatives!(moments, + (electron_density=moments.electron.dens, + electron_upar=moments.electron.upar, + electron_ppar=moments.electron.ppar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) + + if ion_dt !== nothing + evolve_ppar = true + + # Use forward-Euler step (with `ion_dt` as the timestep) as initial guess for + # updated electron_ppar + electron_energy_equation!(scratch[t_params.n_rk_stages+1].electron_ppar, + moments.electron.ppar, moments.electron.dens, + moments.electron.upar, moments.ion.dens, + moments.ion.upar, moments.ion.ppar, + moments.neutral.dens, moments.neutral.uz, + moments.neutral.pz, moments.electron, collisions, + ion_dt, composition, external_source_settings.electron, + num_diss_params, z) + end + + if evolve_ppar + error("advancing electron_ppar is not supported yet in electron_backward_euler()") + end + + if !evolve_ppar + # ppar is not updated in the pseudo-timestepping loop below. So that we can read + # ppar from the scratch structs, copy moments.electron.ppar into all of them. + moments_ppar = moments.electron.ppar + for istage ∈ 1:t_params.n_rk_stages+1 + scratch_ppar = scratch[istage].electron_ppar + @loop_r_z ir iz begin + scratch_ppar[iz,ir] = moments_ppar[iz,ir] + end + end + end + + if initial_time !== nothing + @serial_region begin + t_params.t[] = initial_time + end + _block_synchronize() + # Make sure that output times are set relative to this initial_time (the values in + # t_params are set relative to 0.0). + moments_output_times = t_params.moments_output_times .+ initial_time + dfns_output_times = t_params.dfns_output_times .+ initial_time + else + initial_time = t_params.t[] + end + if io_electron === nothing && t_params.debug_io !== nothing + # Overwrite the debug output file with the output from this call to + # update_electron_pdf_with_time_advance!(). + io_electron = get_electron_io_info(t_params.debug_io[1], "electron_debug") + do_debug_io = true + debug_io_nwrite = t_params.debug_io[3] + else + do_debug_io = false + end + + # Store the initial number of iterations in the solution of the electron kinetic + # equation + initial_step_counter = t_params.step_counter[] + t_params.step_counter[] += 1 + # initialise the electron pdf convergence flag to false + electron_pdf_converged = false + + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, vpa) + end + end + # evolve (artificially) in time until the residual is less than the tolerance + while (!electron_pdf_converged + && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) + || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) + && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) + + reset_nonlinear_per_stage_counters(nl_solver_params) + + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] + + # Set the initial values for the next step to the final values from the previous + # step. The initial guess for f_electron_new and electron_ppar_new are just the + # values from the old timestep, so no need to change those. + begin_r_z_vperp_vpa_region() + f_electron_old = old_scratch.pdf_electron + f_electron_new = new_scratch.pdf_electron + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + f_electron_old[ivpa,ivperp,iz,ir] = f_electron_new[ivpa,ivperp,iz,ir] + end + electron_ppar_old = old_scratch.electron_ppar + electron_ppar_new = new_scratch.electron_ppar + if evolve_ppar + begin_r_z_region() + @loop_r_z ir iz begin + electron_ppar_old[iz,ir] = electron_ppar_new[iz,ir] + end + end + + # Do a forward-Euler update of the electron pdf, and (if evove_ppar=true) the + # electron parallel pressure as an initial guess. + electron_kinetic_equation_euler_update!(new_scratch, old_scratch, moments, z, + vperp, vpa, z_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + collisions, composition, + external_source_settings, num_diss_params, + t_params.dt[]; evolve_ppar=evolve_ppar, + ion_dt=ion_dt) + + # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the + # electron parallel pressure. + function residual_func!(residual, new_variables) + electron_ppar_residual, f_electron_residual = residual + electron_ppar_newvar, f_electron_newvar = new_variables + + new_scratch_electron = scratch_electron_pdf(f_electron_newvar, electron_ppar_newvar) + + apply_electron_bc_and_constraints!(new_scratch_electron, phi, moments, z, + vperp, vpa, vperp_spectral, vpa_spectral, + vpa_advect, num_diss_params, composition) + + # Only the first entry in the `electron_pdf_substruct` will be used, so does not + # matter what we put in the second and third except that they have the right type. + new_pdf = (electron=electron_pdf_substruct(f_electron_newvar, f_electron_newvar, + f_electron_newvar,),) + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf!(moments.electron.qpar, electron_ppar_newvar, + moments.electron.vth, f_electron_newvar, vpa) + + if evolve_ppar + this_dens = moments.electron.dens + this_upar = moments.electron.upar + if update_vth + begin_r_z_region() + this_vth = moments.electron.vth + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end + end + calculate_electron_moment_derivatives!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=electron_ppar_newvar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) + else + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz, moments.electron.qpar, + buffer_r_1, buffer_r_2, buffer_r_3, buffer_r_4, + z_spectral, z) + end + + begin_r_z_region() + @loop_r_z ir iz begin + electron_ppar_residual[iz,ir] = 0.0 + end + #electron_energy_residual!(electron_ppar_residual, electron_ppar_newvar, fvec_in, + # moments, collisions, composition, + # external_source_settings, num_diss_params, z, dt) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_r_z_vperp_vpa_region() + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz,ir] = f_electron_old[ivpa,ivperp,iz,ir] + end + residual_scratch_electron = scratch_electron_pdf(f_electron_residual, + electron_ppar_residual) + new_scratch_electron = scratch_electron_pdf(f_electron_newvar, electron_ppar_newvar) + electron_kinetic_equation_euler_update!(residual_scratch_electron, + new_scratch_electron, moments, z, vperp, + vpa, z_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, + num_diss_params, t_params.dt[]) + + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz,ir,is] = f_electron_newvar[ivpa,ivperp,iz,ir,is] - f_electron_residual[ivpa,ivperp,iz,ir,is] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_r_z_vperp_region() + @loop_r_z_vperp ir iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz,ir], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_r_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, + vperp_adv, vperp_diffusion) + end + if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_electron_newvar, f_old, and residual + # should all be zero at exactly the same set of grid points, so it is + # reasonable to zero-out `residual` to impose the boundary condition. We + # impose this after subtracting f_old in case rounding errors, etc. mean + # that at some point f_old had a different boundary condition cut-off + # index. + begin_r_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + @loop_r ir begin + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], + moments.electron.upar[iz,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm > -zero + f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + end + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + @loop_r ir begin + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], + moments.electron.upar[iz,ir], true, true) + @loop_vperp_vpa ivpa ivperp begin + if v_unnorm < zero + f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + end + end + end + end + end + begin_r_z_region() + @loop_r_z ir iz begin + @views moment_constraints_on_residual!(f_electron_residual[:,:,iz,ir], + f_electron_newvar[:,:,iz,ir], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), + vpa) + end + return nothing + end + + residual = (scratch_dummy.implicit_buffer_zr_1, scratch_dummy.implicit_buffer_vpavperpzr_1) + delta_x = (scratch_dummy.implicit_buffer_zr_2, + scratch_dummy.implicit_buffer_vpavperpzr_2) + rhs_delta = (scratch_dummy.implicit_buffer_zr_3, + scratch_dummy.implicit_buffer_vpavperpzr_3) + v = (scratch_dummy.implicit_buffer_zr_4, + scratch_dummy.implicit_buffer_vpavperpzr_4) + w = (scratch_dummy.implicit_buffer_zr_5, + scratch_dummy.implicit_buffer_vpavperpzr_5) + + newton_success = newton_solve!((electron_ppar_new, f_electron_new), residual_func!, + residual, delta_x, rhs_delta, v, w, nl_solver_params; + left_preconditioner=nothing, + right_preconditioner=nothing, + coords=(r=r, z=z, vperp=vperp, vpa=vpa)) + if !newton_success + error("electron_backward_euler() Newton solve failed") + end + + apply_electron_bc_and_constraints!(new_scratch, phi, moments, z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_advect, + num_diss_params, composition) + + function update_derived_moments_and_derivatives(update_vth=false) + # update the electron heat flux + moments.electron.qpar_updated[] = false + calculate_electron_qpar_from_pdf!(moments.electron.qpar, + electron_ppar_new, + moments.electron.vth, f_electron_new, vpa) + + if evolve_ppar + this_ppar = electron_ppar_new + this_dens = moments.electron.dens + this_upar = moments.electron.upar + if update_vth + begin_r_z_region() + this_vth = moments.electron.vth + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * this_ppar[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end + end + calculate_electron_moment_derivatives!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=this_ppar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) + else + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz, moments.electron.qpar, + buffer_r_1, buffer_r_2, buffer_r_3, buffer_r_4, + z_spectral, z) + end + end + update_derived_moments_and_derivatives() + + #if t_params.adaptive && istage == t_params.n_rk_stages + # if ion_dt === nothing + # local_max_dt = Inf + # else + # # Ensure timestep is not too big, so that d(electron_ppar)/dt 'source + # # term' is numerically stable. + # local_max_dt = 0.5 * ion_dt + # end + # electron_adaptive_timestep_update!(scratch, t_params.t[], t_params, + # moments, phi, z_advect, vpa_advect, + # composition, r, z, vperp, vpa, + # vperp_spectral, vpa_spectral, + # external_source_settings, + # num_diss_params; + # evolve_ppar=evolve_ppar, + # local_max_dt=local_max_dt) + # # Re-do this in case electron_adaptive_timestep_update!() re-arranged the + # # `scratch` vector + # new_scratch = scratch[istage+1] + # old_scratch = scratch[istage] + + # if t_params.previous_dt[] == 0.0 + # # Re-calculate moments and moment derivatives as the timstep needs to + # # be re-done with a smaller dt, so scratch[t_params.n_rk_stages+1] has + # # been reset to the values from the beginning of the timestep here. + # update_derived_moments_and_derivatives(true) + # end + #end + + # update the time following the pdf update + @serial_region begin + t_params.t[] += t_params.previous_dt[] + end + _block_synchronize() + + residual = -1.0 + if t_params.previous_dt[] > 0.0 + # Calculate residuals to decide if iteration is converged. + # Might want an option to calculate the residual only after a certain number + # of iterations (especially during initialization when there are likely to be + # a large number of iterations required) to avoid the expense, and especially + # the global MPI.Bcast()? + begin_r_z_vperp_vpa_region() + residual = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.previous_dt[]; use_mpi=true, + only_max_abs=true) + if global_rank[] == 0 + residual = first(values(residual))[1] + end + if evolve_ppar + ppar_residual = + steady_state_residuals(new_scratch.electron_ppar, + old_scratch.electron_ppar, + t_params.previous_dt[]; use_mpi=true, + only_max_abs=true) + if global_rank[] == 0 + ppar_residual = first(values(ppar_residual))[1] + residual = max(residual, ppar_residual) + end + end + if global_rank[] == 0 + if residual_tolerance === nothing + residual_tolerance = t_params.converged_residual_value + end + electron_pdf_converged = abs(residual) < residual_tolerance + end + electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world) + end + + if (mod(t_params.step_counter[] - initial_step_counter,100) == 0) + begin_serial_region() + @serial_region begin + if z.irank == 0 && z.irank == z.nrank - 1 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual) + elseif z.irank == 0 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual) + end + end + end + if ((t_params.adaptive && t_params.write_moments_output[]) + || (!t_params.adaptive && t_params.step_counter[] % t_params.nwrite_moments == 0) + || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) + + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + t_params.write_moments_output[] = false + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, + vpa) + end + end + end + + # check to see if the electron pdf satisfies the electron kinetic equation to within the specified tolerance + + t_params.step_counter[] += 1 + if electron_pdf_converged + break + end + end + # Update the 'pdf' arrays with the final result + begin_r_z_vperp_vpa_region() + final_scratch_pdf = scratch[t_params.n_rk_stages+1].pdf_electron + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + pdf[ivpa,ivperp,iz,ir] = final_scratch_pdf[ivpa,ivperp,iz,ir] + end + if evolve_ppar + # Update `moments.electron.ppar` with the final electron pressure + begin_r_z_region() + scratch_ppar = scratch[t_params.n_rk_stages+1].electron_ppar + moments_ppar = moments.electron.ppar + @loop_r_z ir iz begin + moments_ppar[iz,ir] = scratch_ppar[iz,ir] + end + end + begin_serial_region() + @serial_region begin + if !electron_pdf_converged || do_debug_io + if io_electron !== nothing && io_electron !== true + t_params.moments_output_counter[] += 1 + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, vpa) + finish_electron_io(io_electron) + end + end + end + if !electron_pdf_converged + success = "kinetic-electrons" + else + success = "" + end + return success +end + """ implicit_electron_advance!() diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 8d7db7598..871ac5a49 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -714,12 +714,14 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, + nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_initial_electron, initial_time=code_time, residual_tolerance=t_input["initialization_residual_value"], - evolve_ppar=true) + evolve_ppar=true, + solution_method="artificial_time_derivative") if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") @@ -769,6 +771,11 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field gyroavs, scratch_dummy, 0.0, initialisation_nl_solver_params) else + begin_serial_region() + @serial_region begin + t_params.electron.dt[] = t_input["dt"] + t_params.electron.previous_dt[] = t_input["dt"] + end success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, z_spectral, @@ -776,6 +783,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, + nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_initial_electron) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 062d888e3..2ed66a62d 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -663,7 +663,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop else electron_conduction_nl_solve_parameters = nothing end - if t_params.implicit_electron_advance + if t_params.implicit_electron_advance || + composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) nl_solver_electron_advance_params = setup_nonlinear_solve(input_dict, (r=r, z=z, vperp=vperp, vpa=vpa), @@ -770,6 +772,11 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop resize!(t_params.electron.dfns_output_times, 0) t_params.electron.moments_output_counter[] = 1 t_params.electron.dfns_output_counter[] = 1 + begin_serial_region() + @serial_region begin + t_params.electron.dt[] = t_input["electron_t_input"]["dt"] + t_params.electron.previous_dt[] = t_input["electron_t_input"]["dt"] + end elseif composition.electron_physics != restart_electron_physics begin_serial_region() @serial_region begin @@ -1498,7 +1505,7 @@ function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, buffer_vpavperpr_5 = allocate_shared_float(nvpa,nvperp,nr) buffer_vpavperpr_6 = allocate_shared_float(nvpa,nvperp,nr) - if t_params.implicit_electron_advance + if t_params.implicit_electron_advance || true implicit_buffer_zr_1 = allocate_shared_float(nz,nr) implicit_buffer_zr_2 = allocate_shared_float(nz,nr) implicit_buffer_zr_3 = allocate_shared_float(nz,nr) @@ -2017,8 +2024,8 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param scratch[t_params.n_rk_stages+1], pdf, moments, fields, nothing, nothing, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, false; pdf_bc_constraints=false, - update_electrons=false) + t_params, nl_solver_params, advance, scratch_dummy, false; + pdf_bc_constraints=false, update_electrons=false) end if finish_now @@ -2309,8 +2316,8 @@ function apply_all_bcs_constraints_update_moments!( this_scratch, pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, diagnostic_moments; pdf_bc_constraints=true, - update_electrons=true) + t_params, nl_solver_params, advance, scratch_dummy, diagnostic_moments; + pdf_bc_constraints=true, update_electrons=true) begin_s_r_z_region() @@ -2412,7 +2419,8 @@ function apply_all_bcs_constraints_update_moments!( z_spectral, vperp_spectral, vpa_spectral, electron_z_advect, electron_vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, - max_electron_pdf_iterations, max_electron_sim_time) + nl_solver_params.electron_advance, max_electron_pdf_iterations, + max_electron_sim_time) success = kinetic_electron_success end end @@ -2489,12 +2497,12 @@ end """ adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, - t_params, moments, fields, + t_params, pdf, moments, fields, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, - advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success, nl_max_its_fraction) + advect_objects, gyroavs, num_diss_params, + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. @@ -2503,9 +2511,9 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, t_params, pdf, moments, fields, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, - advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success, nl_max_its_fraction) + advect_objects, gyroavs, num_diss_params, + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) #error_norm_method = "Linf" error_norm_method = "L2" @@ -2636,8 +2644,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, loworder_constraints_scratch, pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - false; update_electrons=false) + external_source_settings, num_diss_params, t_params, nl_solver_params, advance, + scratch_dummy, false; update_electrons=false) # Re-calculate moment derivatives in the `moments` struct, in case they were changed # by the previous call @@ -2645,8 +2653,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, scratch[t_params.n_rk_stages+1], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - false; pdf_bc_constraints=false, update_electrons=false) + external_source_settings, num_diss_params, t_params, nl_solver_params, advance, + scratch_dummy, false; pdf_bc_constraints=false, update_electrons=false) # Calculate the timstep error estimates if z.bc == "wall" && (moments.evolve_upar || moments.evolve_ppar) @@ -3033,7 +3041,7 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, false) + t_params, nl_solver_params, advance, scratch_dummy, false) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3079,9 +3087,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, scratch[istage+1], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - diagnostic_moments; pdf_bc_constraints=apply_bc_constraints, - update_electrons=update_electrons) + external_source_settings, num_diss_params, t_params, nl_solver_params, + advance, scratch_dummy, diagnostic_moments; + pdf_bc_constraints=apply_bc_constraints, update_electrons=update_electrons) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3104,8 +3112,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, - advance, scratch_dummy, r, z, vperp, vpa, - vzeta, vr, vz, success, nl_max_its_fraction) + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) elseif success != "" error("Implicit part of timestep failed") end @@ -3502,6 +3510,7 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, + nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; ion_dt=dt) success = (electron_success == "") From d39491885415534e8db6eb2dc43049f9c384405d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 21 Aug 2024 17:52:41 +0100 Subject: [PATCH 009/107] Fix boundary conditions in electron kinetic equation residuals --- .../src/electron_kinetic_equation.jl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5e938b2de..9b8d832f9 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -859,8 +859,8 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], moments.electron.upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin - if v_unnorm > -zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + if v_unnorm[ivpa] > -zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end @@ -870,9 +870,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos @loop_r ir begin v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], moments.electron.upar[iz,ir], true, true) - @loop_vperp_vpa ivpa ivperp begin - if v_unnorm < zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end @@ -1222,8 +1222,8 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin - if v_unnorm > -zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + if v_unnorm[ivpa] > -zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end @@ -1233,9 +1233,9 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo @loop_r ir begin v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivpa ivperp begin - if v_unnorm < zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end From facd373421818109d2c30e257938a602238ac317 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 12:08:00 +0100 Subject: [PATCH 010/107] Don't try negative step in line search in `newton_solve!()` The negative step in the line search was supposed to help make the iteration more robust by giving another option to make the residual decreases, but sometimes seems to stall convergence. May be better to just not use it. --- moment_kinetics/src/nonlinear_solvers.jl | 27 +++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index c564f6083..62d15b791 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -331,20 +331,27 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, s *= 0.5 end + #if residual_norm > previous_residual_norm + # # Failed to find a point that decreases the residual, so try a negative + # # step + # s = -1.0e-5 + # parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + # residual_func!(residual, x) + # residual_norm = distributed_norm(residual) + # if residual_norm > previous_residual_norm + # # That didn't work either, so just take the full step and hope for + # # convergence later + # parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + # residual_func!(residual, x) + # residual_norm = distributed_norm(residual) + # end + #end if residual_norm > previous_residual_norm - # Failed to find a point that decreases the residual, so try a negative - # step - s = -1.0e-5 + # Line search didn't work, so just take the full step and hope for + # convergence later parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) residual_func!(residual, x) residual_norm = distributed_norm(residual) - if residual_norm > previous_residual_norm - # That didn't work either, so just take the full step and hope for - # convergence later - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) - residual_func!(residual, x) - residual_norm = distributed_norm(residual) - end end end parallel_map((w) -> w, x, w) From 3e5bd7b9476faf067f1d62d848f9befd9f8221c5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 12:50:55 +0100 Subject: [PATCH 011/107] Allow updating electron_ppar with electron_backward_euler!() Allows using `electron_backward_euler!()` for the first kinetic electron initialisation phase (where both electron distribution function and electron parallel pressure are evolved together) and using the `implicit_electron_ppar` option. --- ...netic-implicit-electron_ppar-loworder.toml | 136 ++++++++++++++++++ .../src/electron_kinetic_equation.jl | 57 ++++---- moment_kinetics/src/initial_conditions.jl | 3 +- moment_kinetics/src/time_advance.jl | 8 +- 4 files changed, 172 insertions(+), 32 deletions(-) create mode 100644 examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml new file mode 100644 index 000000000..c57ceaafa --- /dev/null +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -0,0 +1,136 @@ +#runtime_plots = true +n_ion_species = 1 +n_neutral_species = 1 +electron_physics = "kinetic_electrons" +evolve_moments_density = true +evolve_moments_parallel_flow = true +evolve_moments_parallel_pressure = true +evolve_moments_conservation = true +recycling_fraction = 0.5 +T_e = 1.0 +T_wall = 0.1 +initial_density1 = 1.0 +initial_temperature1 = 1.0 +z_IC_option1 = "sinusoid" +z_IC_density_amplitude1 = 0.1 +z_IC_density_phase1 = 0.0 +z_IC_upar_amplitude1 = 0.1 +z_IC_upar_phase1 = 0.0 +z_IC_temperature_amplitude1 = 0.1 +z_IC_temperature_phase1 = 1.0 +vpa_IC_option1 = "gaussian" +vpa_IC_density_amplitude1 = 1.0 +vpa_IC_density_phase1 = 0.0 +vpa_IC_upar_amplitude1 = 0.0 +vpa_IC_upar_phase1 = 0.0 +vpa_IC_temperature_amplitude1 = 0.0 +vpa_IC_temperature_phase1 = 0.0 +initial_density2 = 1.0 +initial_temperature2 = 1.0 +z_IC_option2 = "sinusoid" +z_IC_density_amplitude2 = 0.001 +z_IC_density_phase2 = 0.0 +z_IC_upar_amplitude2 = 0.0 +z_IC_upar_phase2 = 0.0 +z_IC_temperature_amplitude2 = 0.0 +z_IC_temperature_phase2 = 0.0 +vpa_IC_option2 = "gaussian" +vpa_IC_density_amplitude2 = 1.0 +vpa_IC_density_phase2 = 0.0 +vpa_IC_upar_amplitude2 = 0.0 +vpa_IC_upar_phase2 = 0.0 +vpa_IC_temperature_amplitude2 = 0.0 +vpa_IC_temperature_phase2 = 0.0 +charge_exchange_frequency = 0.75 +ionization_frequency = 0.0 +constant_ionization_rate = false +nu_ei = 1000.0 +r_ngrid = 1 +r_nelement = 1 +z_ngrid = 5 +z_nelement = 16 +#z_nelement_local = 16 +z_bc = "periodic" +#z_discretization = "chebyshev_pseudospectral" +z_discretization = "gausslegendre_pseudospectral" +vpa_ngrid = 6 +vpa_nelement = 31 +vpa_L = 12.0 +vpa_bc = "zero" +#vpa_discretization = "chebyshev_pseudospectral" +vpa_discretization = "gausslegendre_pseudospectral" +vpa_element_spacing_option = "coarse_tails" +vz_ngrid = 6 +vz_nelement = 31 +vz_L = 12.0 +vz_bc = "zero" +#vz_discretization = "chebyshev_pseudospectral" +vz_discretization = "gausslegendre_pseudospectral" +vz_element_spacing_option = "coarse_tails" + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-6 +minimum_dt = 1.0e-7 +#maximum_dt = 2.0e-5 +rtol = 1.0e-4 +max_increase_factor_near_last_fail = 1.001 +last_fail_proximity_factor = 1.1 +max_increase_factor = 1.05 +nwrite = 10000 +nwrite_dfns = 10000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +#dt = 2.0e-8 +dt = 2.0e-5 +maximum_dt = 1.0 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-6 +atol = 1.0e-14 +minimum_dt = 1.0e-10 +initialization_residual_value = 2.5 +#converged_residual_value = 0.1 #1.0e-3 +converged_residual_value = 1.0e-2 +#debug_io = 10000 + +[nonlinear_solver] +nonlinear_max_iterations = 1000 +rtol = 1.0e-5 +atol = 1.0e-12 +linear_restart = 40 +#linear_restart = 200 + +[ion_numerical_dissipation] +vpa_dissipation_coefficient = 1.0e0 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +#vpa_dissipation_coefficient = 2.0e2 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +vz_dissipation_coefficient = 1.0e-1 +force_minimum_pdf_value = 0.0 + +[krook_collisions] +use_krook = true + +frequency_option = "reference_parameters" +nuee0 = 1000.0 +nuei0 = 1000.0 diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9b8d832f9..c7d71f7f3 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -652,10 +652,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos num_diss_params, z) end - if evolve_ppar - error("advancing electron_ppar is not supported yet in electron_backward_euler()") - end - if !evolve_ppar # ppar is not updated in the pseudo-timestepping loop below. So that we can read # ppar from the scratch structs, copy moments.electron.ppar into all of them. @@ -734,15 +730,16 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end - # Do a forward-Euler update of the electron pdf, and (if evove_ppar=true) the - # electron parallel pressure as an initial guess. + # Do a forward-Euler update of the electron pdf as an initial guess. Even when + # evolving electron_ppar, do not update electron_ppar here because if dt is bigger + # than ion_dt, then an explicit timestep will likely make electron_ppar over-shoot + # which would just take more iterations in the Newton-Krylov solve to fix. electron_kinetic_equation_euler_update!(new_scratch, old_scratch, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, num_diss_params, - t_params.dt[]; evolve_ppar=evolve_ppar, - ion_dt=ion_dt) + t_params.dt[]) # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. @@ -767,16 +764,14 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if evolve_ppar this_dens = moments.electron.dens this_upar = moments.electron.upar - if update_vth - begin_r_z_region() - this_vth = moments.electron.vth - @loop_r_z ir iz begin - # update the electron thermal speed using the updated electron - # parallel pressure - this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / - (this_dens[iz,ir] * - composition.me_over_mi))) - end + begin_r_z_region() + this_vth = moments.electron.vth + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) end calculate_electron_moment_derivatives!( moments, @@ -793,13 +788,17 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos z_spectral, z) end - begin_r_z_region() - @loop_r_z ir iz begin - electron_ppar_residual[iz,ir] = 0.0 + if evolve_ppar + begin_r_z_region() + @loop_r_z ir iz begin + electron_ppar_residual[iz,ir] = electron_ppar_old[iz,ir] + end + else + begin_r_z_region() + @loop_r_z ir iz begin + electron_ppar_residual[iz,ir] = 0.0 + end end - #electron_energy_residual!(electron_ppar_residual, electron_ppar_newvar, fvec_in, - # moments, collisions, composition, - # external_source_settings, num_diss_params, z, dt) # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the # electron_pdf member of the first argument, so if we set the electron_pdf member @@ -817,7 +816,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, t_params.dt[]) + num_diss_params, t_params.dt[]; + evolve_ppar=evolve_ppar, + ion_dt=ion_dt) # Now # residual = f_electron_old + dt*RHS(f_electron_newvar) @@ -826,6 +827,12 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin f_electron_residual[ivpa,ivperp,iz,ir,is] = f_electron_newvar[ivpa,ivperp,iz,ir,is] - f_electron_residual[ivpa,ivperp,iz,ir,is] end + if evolve_ppar + begin_r_z_region() + @loop_r_z ir iz begin + electron_ppar_residual[iz,ir] = electron_ppar_newvar[iz,ir] - electron_ppar_residual[iz,ir] + end + end # Set residual to zero where pdf_electron is determined by boundary conditions. if vpa.n > 1 diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 871ac5a49..e712b1c36 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -720,8 +720,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field io_electron=io_initial_electron, initial_time=code_time, residual_tolerance=t_input["initialization_residual_value"], - evolve_ppar=true, - solution_method="artificial_time_derivative") + evolve_ppar=true) if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 2ed66a62d..12b55839d 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -708,7 +708,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " * "time") end - if nl_solver_electron_advance_params !== nothing && t_params.implicit_electron_ppar + if t_params.implicit_electron_advance && t_params.implicit_electron_ppar error("Cannot use implicit_electron_advance and implicit_electron_ppar at the " * "same time.") end @@ -3489,7 +3489,7 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen electron_z_advect, electron_vpa_advect = advect_objects.electron_z_advect, advect_objects.electron_vpa_advect neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect - if nl_solver_params.electron_advance !== nothing + if t_params.implicit_electron_advance success = implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, moments, fields, collisions, composition, geometry, external_source_settings, @@ -3499,9 +3499,7 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen electron_vpa_advect, gyroavs, scratch_dummy, dt, nl_solver_params.electron_advance) elseif t_params.implicit_electron_ppar - #max_electron_pdf_iterations = 1000 - #max_electron_sim_time = nothing - max_electron_pdf_iterations = nothing + max_electron_pdf_iterations = 1000 max_electron_sim_time = 1.0e-3 electron_success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, From 5d84d62bbbe850ad8dc939b0c00173bbd19b4d49 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 17:18:50 +0100 Subject: [PATCH 012/107] Limit electron dt to be less than 0.5*ion_dt Prevents problems with convergence. --- moment_kinetics/src/electron_kinetic_equation.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index c7d71f7f3..35c451048 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -650,6 +650,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos moments.neutral.pz, moments.electron, collisions, ion_dt, composition, external_source_settings.electron, num_diss_params, z) + + if t_params.dt[] > 0.5 * ion_dt + begin_serial_region() + @serial_region begin + t_params.dt[] = 0.5 * ion_dt + end + end end if !evolve_ppar @@ -1088,6 +1095,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end end + if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] + # Reset dt in case it was reduced to be less than 0.5*ion_dt + begin_serial_region() + @serial_region begin + t_params.dt[] = t_params.previous_dt[] + end + end if !electron_pdf_converged success = "kinetic-electrons" else From 64003eb2e6048b9c2898abd19abd02b6ca6ea90e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 12 Aug 2024 17:23:40 +0100 Subject: [PATCH 013/107] Adjust electron dt based on iteration counts --- ...netic-implicit-electron_ppar-loworder.toml | 4 +- .../src/electron_kinetic_equation.jl | 131 ++++++++++++------ moment_kinetics/src/initial_conditions.jl | 5 - moment_kinetics/src/nonlinear_solvers.jl | 6 +- moment_kinetics/src/time_advance.jl | 9 +- 5 files changed, 99 insertions(+), 56 deletions(-) diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml index c57ceaafa..f82a5c2f3 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -109,10 +109,10 @@ converged_residual_value = 1.0e-2 #debug_io = 10000 [nonlinear_solver] -nonlinear_max_iterations = 1000 +nonlinear_max_iterations = 100 rtol = 1.0e-5 atol = 1.0e-12 -linear_restart = 40 +#linear_restart = 40 #linear_restart = 200 [ion_numerical_dissipation] diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 35c451048..5b5d10b60 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -599,6 +599,12 @@ end """ Update the electron distribution function using backward-Euler for an artifical time advance of the electron kinetic equation until a steady-state solution is reached. + +Note that this function does not use the [`runge_kutta`](@ref) timestep functionality. +`t_params.previous_dt[]` is used to store the (adaptively updated) initial timestep of the +pseudotimestepping loop (initial value of `t_params.dt[]` within +`electron_backward_euler!()`). `t_params.dt[]` is adapted according to the iteration +counts of the Newton solver. """ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, composition, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, @@ -611,6 +617,11 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") end + begin_serial_region() + @serial_region begin + t_params.dt[] = t_params.previous_dt[] + end + begin_r_z_region() # create several (r) dimension dummy arrays for use in taking derivatives @@ -637,6 +648,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos num_diss_params.electron.moment_dissipation_coefficient, composition.electron_physics) + reduced_by_ion_dt = false if ion_dt !== nothing evolve_ppar = true @@ -656,6 +668,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos @serial_region begin t_params.dt[] = 0.5 * ion_dt end + reduced_by_ion_dt = true end end @@ -708,6 +721,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.moments_output_counter[], r, z, vperp, vpa) end end + first_step = true # evolve (artificially) in time until the residual is less than the tolerance while (!electron_pdf_converged && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) @@ -919,8 +933,81 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos left_preconditioner=nothing, right_preconditioner=nothing, coords=(r=r, z=z, vperp=vperp, vpa=vpa)) - if !newton_success - error("electron_backward_euler() Newton solve failed") + if newton_success + #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) + begin_serial_region() + @serial_region begin + # update the time following the pdf update + t_params.t[] += t_params.dt[] + + if first_step && !reduced_by_ion_dt + # Adjust t_params.previous_dt[] which gives the initial timestep for + # the electron pseudotimestepping loop. + # If ion_dt ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) + elseif nl_solver_params.max_linear_iterations_this_step[] > 10 + # Step succeeded, but took a lot of iterations so decrease initial + # step size. + print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] /= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) + elseif nl_solver_params.max_linear_iterations_this_step[] < 2 + # Only took a few iterations, so increase initial step size. + print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] *= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + end + end + + # Adjust the timestep depending on the iteration count. + # Note nl_solver_params.max_linear_iterations_this_step[] gives the total + # number of iterations, so is a better measure of the total work done by + # the solver than the nonlinear iteration count, or the linear iterations + # per nonlinear iteration + #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) + if nl_solver_params.max_linear_iterations_this_step[] > 5 && t_params.dt[] > t_params.previous_dt[] + # Step succeeded, but took a lot of iterations so decrease step size. + t_params.dt[] /= t_params.max_increase_factor + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.05 * nl_solver_params.nonlinear_max_iterations, 5) + elseif nl_solver_params.max_linear_iterations_this_step[] < 2 + #elseif nl_solver_params.max_nonlinear_iterations_this_step[] < 3 + # Only took a few iterations, so increase step size. + t_params.dt[] *= t_params.max_increase_factor + end +#if nl_solver_params.max_nonlinear_iterations_this_step[] < 4 +# # Only took a few iterations, so increase step size. +# t_params.dt[] *= 1.5 +#elseif nl_solver_params.max_nonlinear_iterations_this_step[] > 10 +# # Only took a few iterations, so increase step size. +# t_params.dt[] *= 0.9 +#end + end + _block_synchronize() + + first_step = false + else + begin_serial_region() + @serial_region begin + t_params.dt[] *= 0.5 + end + _block_synchronize() + + # Swap old_scratch and new_scratch so that the next step restarts from the + # same state + scratch[1] = new_scratch + scratch[t_params.n_rk_stages+1] = old_scratch + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] end apply_electron_bc_and_constraints!(new_scratch, phi, moments, z, vperp, vpa, @@ -966,43 +1053,8 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end update_derived_moments_and_derivatives() - #if t_params.adaptive && istage == t_params.n_rk_stages - # if ion_dt === nothing - # local_max_dt = Inf - # else - # # Ensure timestep is not too big, so that d(electron_ppar)/dt 'source - # # term' is numerically stable. - # local_max_dt = 0.5 * ion_dt - # end - # electron_adaptive_timestep_update!(scratch, t_params.t[], t_params, - # moments, phi, z_advect, vpa_advect, - # composition, r, z, vperp, vpa, - # vperp_spectral, vpa_spectral, - # external_source_settings, - # num_diss_params; - # evolve_ppar=evolve_ppar, - # local_max_dt=local_max_dt) - # # Re-do this in case electron_adaptive_timestep_update!() re-arranged the - # # `scratch` vector - # new_scratch = scratch[istage+1] - # old_scratch = scratch[istage] - - # if t_params.previous_dt[] == 0.0 - # # Re-calculate moments and moment derivatives as the timstep needs to - # # be re-done with a smaller dt, so scratch[t_params.n_rk_stages+1] has - # # been reset to the values from the beginning of the timestep here. - # update_derived_moments_and_derivatives(true) - # end - #end - - # update the time following the pdf update - @serial_region begin - t_params.t[] += t_params.previous_dt[] - end - _block_synchronize() - residual = -1.0 - if t_params.previous_dt[] > 0.0 + if newton_success # Calculate residuals to decide if iteration is converged. # Might want an option to calculate the residual only after a certain number # of iterations (especially during initialization when there are likely to be @@ -1046,8 +1098,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end end - if ((t_params.adaptive && t_params.write_moments_output[]) - || (!t_params.adaptive && t_params.step_counter[] % t_params.nwrite_moments == 0) + if ((t_params.step_counter[] % t_params.nwrite_moments == 0) || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) begin_serial_region() diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index e712b1c36..73afb2bdf 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -770,11 +770,6 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field gyroavs, scratch_dummy, 0.0, initialisation_nl_solver_params) else - begin_serial_region() - @serial_region begin - t_params.electron.dt[] = t_input["dt"] - t_params.electron.previous_dt[] = t_input["dt"] - end success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, z_spectral, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 62d15b791..e31974f22 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -61,6 +61,7 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} stage_counter::Ref{mk_int} serial_solve::Bool max_nonlinear_iterations_this_step::Ref{mk_int} + max_linear_iterations_this_step::Ref{mk_int} preconditioner_update_interval::mk_int preconditioners::Tprecon end @@ -141,7 +142,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol nl_solver_input.linear_rtol, nl_solver_input.linear_atol, linear_restart, nl_solver_input.linear_max_restarts, H, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), - Ref(0), Ref(0), serial_solve, Ref(0), + Ref(0), Ref(0), serial_solve, Ref(0), Ref(0), nl_solver_input.preconditioner_update_interval, preconditioners) end @@ -156,6 +157,7 @@ function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_in end nl_solver_params.max_nonlinear_iterations_this_step[] = 0 + nl_solver_params.max_linear_iterations_this_step[] = 0 return nothing end @@ -375,6 +377,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params.linear_iterations[] += linear_counter nl_solver_params.max_nonlinear_iterations_this_step[] = max(counter, nl_solver_params.max_nonlinear_iterations_this_step[]) + nl_solver_params.max_linear_iterations_this_step[] = + max(linear_counter, nl_solver_params.max_linear_iterations_this_step[]) # println("Newton iterations: ", counter) # println("Final residual: ", residual_norm) # println("Total linear iterations: ", linear_counter) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 12b55839d..a7cdd069e 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -772,11 +772,6 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop resize!(t_params.electron.dfns_output_times, 0) t_params.electron.moments_output_counter[] = 1 t_params.electron.dfns_output_counter[] = 1 - begin_serial_region() - @serial_region begin - t_params.electron.dt[] = t_input["electron_t_input"]["dt"] - t_params.electron.previous_dt[] = t_input["electron_t_input"]["dt"] - end elseif composition.electron_physics != restart_electron_physics begin_serial_region() @serial_region begin @@ -2379,9 +2374,7 @@ function apply_all_bcs_constraints_update_moments!( composition.electron_physics) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) - #max_electron_pdf_iterations = 1000 - #max_electron_sim_time = nothing - max_electron_pdf_iterations = nothing + max_electron_pdf_iterations = 1000 max_electron_sim_time = 1.0e-3 # Copy ion and electron moments from `scratch` into `moments` to be used in From be22192f5b41e1a1ff100b9af12455fa2ec54257 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 09:22:04 +0100 Subject: [PATCH 014/107] Move nonlinear solver stage_counter increment to counter reset function Can take care of incrementing `stage_counter[]` in `reset_nonlinear_per_stage_counters!()`, so it does not have to be done separately for each solver. Also rename to `reset_nonlinear_per_stage_counters!()` from `reset_nonlinear_per_stage_counters()` to indicate that this function modifies its argument. --- moment_kinetics/src/electron_fluid_equations.jl | 2 -- moment_kinetics/src/electron_kinetic_equation.jl | 4 +--- moment_kinetics/src/nonlinear_solvers.jl | 11 ++++++++--- moment_kinetics/src/time_advance.jl | 7 +++---- moment_kinetics/src/vpa_advection.jl | 2 -- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 395a94f4c..68e800e85 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -448,8 +448,6 @@ function implicit_braginskii_conduction!(fvec_out, fvec_in, moments, z, r, dt, z end end - nl_solver_params.stage_counter[] += 1 - return true end diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5b5d10b60..d21437f19 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -728,8 +728,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) - reset_nonlinear_per_stage_counters(nl_solver_params) - old_scratch = scratch[1] new_scratch = scratch[t_params.n_rk_stages+1] @@ -1113,7 +1111,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end - # check to see if the electron pdf satisfies the electron kinetic equation to within the specified tolerance + reset_nonlinear_per_stage_counters!(nl_solver_params) t_params.step_counter[] += 1 if electron_pdf_converged diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index e31974f22..4595a6b96 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -26,7 +26,7 @@ Useful references: module nonlinear_solvers export setup_nonlinear_solve, gather_nonlinear_solver_counters!, - reset_nonlinear_per_stage_counters, newton_solve! + reset_nonlinear_per_stage_counters!, newton_solve! using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -147,11 +147,13 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol end """ - reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + reset_nonlinear_per_stage_counters!(nl_solver_params::Union{nl_solver_info,Nothing}) Reset the counters that hold per-step totals or maximums in `nl_solver_params`. + +Also increment `nl_solver_params.stage_counter[]`. """ -function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) +function reset_nonlinear_per_stage_counters!(nl_solver_params::Union{nl_solver_info,Nothing}) if nl_solver_params === nothing return nothing end @@ -159,6 +161,9 @@ function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_in nl_solver_params.max_nonlinear_iterations_this_step[] = 0 nl_solver_params.max_linear_iterations_this_step[] = 0 + # Also increment the stage counter + nl_solver_params.stage_counter[] += 1 + return nothing end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index a7cdd069e..146e834b6 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3115,8 +3115,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, # println() #end - reset_nonlinear_per_stage_counters(nl_solver_params.ion_advance) - reset_nonlinear_per_stage_counters(nl_solver_params.vpa_advection) + reset_nonlinear_per_stage_counters!(nl_solver_params.ion_advance) + reset_nonlinear_per_stage_counters!(nl_solver_params.vpa_advection) + reset_nonlinear_per_stage_counters!(nl_solver_params.electron_conduction) if t_params.previous_dt[] > 0.0 istage = n_rk_stages+1 @@ -3790,8 +3791,6 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o left_preconditioner=left_preconditioner, right_preconditioner=right_preconditioner) - nl_solver_params.stage_counter[] += 1 - return success end diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index bd70b3503..3d9b5897e 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -309,8 +309,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_ end end - nl_solver_params.stage_counter[] += 1 - return true end From ab30e8501921cf83b12248268a1dc3b7d5b3cc80 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 13:56:27 +0100 Subject: [PATCH 015/107] Comment out unused electron_kinetic_equation_residual!() --- .../src/electron_kinetic_equation.jl | 166 +++++++++--------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index d21437f19..a715d38f6 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2438,89 +2438,89 @@ function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, return nothing end -""" -electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation -INPUTS: - residual = dummy array to be filled with the residual of the electron kinetic equation -OUTPUT: - residual = updated residual of the electron kinetic equation -""" -function electron_kinetic_equation_residual!(residual, max_term, single_term, pdf, dens, upar, vth, ppar, upar_ion, - ddens_dz, dppar_dz, dqpar_dz, dvth_dz, - z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - collisions, external_source_settings, - num_diss_params, dt_electron) - - # initialise the residual to zero - begin_r_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - residual[ivpa,ivperp,iz,ir] = 0.0 - end - # calculate the contribution to the residual from the z advection term - electron_z_advection!(residual, pdf, upar, vth, z_advect, z, vpa.grid, z_spectral, scratch_dummy, -1.0) - #dt_max_zadv = simple_z_advection!(residual, pdf, vth, z, vpa.grid, dt_electron) - #single_term .= residual - #max_term .= abs.(residual) - #println("z_adv residual = ", maximum(abs.(single_term))) - #println("z_advection: ", sum(residual), " dqpar_dz: ", sum(abs.(dqpar_dz))) - #calculate_contribution_from_z_advection!(residual, pdf, vth, z, vpa.grid, z_spectral, scratch_dummy) - # add in the contribution to the residual from the wpa advection term - electron_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, - vpa_advect, vpa, vpa_spectral, scratch_dummy, -1.0, - external_source_settings.electron) - #dt_max_vadv = simple_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, vpa, dt_electron) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #@. single_term = residual - #println("v_adv residual = ", maximum(abs.(single_term))) - #add_contribution_from_wpa_advection!(residual, pdf, vth, ppar, dppar_dz, dqpar_dz, dvth_dz, vpa, vpa_spectral) - # add in the contribution to the residual from the term proportional to the pdf - add_contribution_from_pdf_term!(residual, pdf, ppar, dens, moments, vpa.grid, z, -1.0, - external_source_settings.electron) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #@. single_term = residual - #println("pdf_term residual = ", maximum(abs.(single_term))) - # @loop_vpa ivpa begin - # @loop_z iz begin - # println("LHS: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 1) - # end - # println("") - # end - # println("") - # add in numerical dissipation terms - add_dissipation_term!(residual, pdf, scratch_dummy, z_spectral, z, vpa, vpa_spectral, - num_diss_params, -1.0) - #@. single_term = residual - single_term - #println("dissipation residual = ", maximum(abs.(single_term))) - #max_term .= max.(max_term, abs.(single_term)) - # add in particle and heat source term(s) - #@. single_term = residual - #add_source_term!(residual, vpa.grid, z.grid, dvth_dz) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #stop() - # @loop_vpa ivpa begin - # @loop_z iz begin - # println("total_residual: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 2) - # end - # println("") - # end - # stop() - #dt_max = min(dt_max_zadv, dt_max_vadv) - - if collisions.krook_collision_frequency_prefactor_ee > 0.0 - # Add a Krook collision operator - # Set dt=-1 as we update the residual here rather than adding an update to - # 'fvec_out'. - electron_krook_collisions!(residual, pdf, dens, upar, upar_ion, vth, - collisions, vperp, vpa, -1.0) - end - - dt_max = dt_electron - #println("dt_max: ", dt_max, " dt_max_zadv: ", dt_max_zadv, " dt_max_vadv: ", dt_max_vadv) - return dt_max -end +#""" +#electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation +#INPUTS: +# residual = dummy array to be filled with the residual of the electron kinetic equation +#OUTPUT: +# residual = updated residual of the electron kinetic equation +#""" +#function electron_kinetic_equation_residual!(residual, max_term, single_term, pdf, dens, upar, vth, ppar, upar_ion, +# ddens_dz, dppar_dz, dqpar_dz, dvth_dz, +# z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, +# collisions, external_source_settings, +# num_diss_params, dt_electron) +# +# # initialise the residual to zero +# begin_r_vperp_vpa_region() +# @loop_r_z_vperp_vpa ir iz ivperp ivpa begin +# residual[ivpa,ivperp,iz,ir] = 0.0 +# end +# # calculate the contribution to the residual from the z advection term +# electron_z_advection!(residual, pdf, upar, vth, z_advect, z, vpa.grid, z_spectral, scratch_dummy, -1.0) +# #dt_max_zadv = simple_z_advection!(residual, pdf, vth, z, vpa.grid, dt_electron) +# #single_term .= residual +# #max_term .= abs.(residual) +# #println("z_adv residual = ", maximum(abs.(single_term))) +# #println("z_advection: ", sum(residual), " dqpar_dz: ", sum(abs.(dqpar_dz))) +# #calculate_contribution_from_z_advection!(residual, pdf, vth, z, vpa.grid, z_spectral, scratch_dummy) +# # add in the contribution to the residual from the wpa advection term +# electron_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, +# vpa_advect, vpa, vpa_spectral, scratch_dummy, -1.0, +# external_source_settings.electron) +# #dt_max_vadv = simple_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, vpa, dt_electron) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #@. single_term = residual +# #println("v_adv residual = ", maximum(abs.(single_term))) +# #add_contribution_from_wpa_advection!(residual, pdf, vth, ppar, dppar_dz, dqpar_dz, dvth_dz, vpa, vpa_spectral) +# # add in the contribution to the residual from the term proportional to the pdf +# add_contribution_from_pdf_term!(residual, pdf, ppar, dens, moments, vpa.grid, z, -1.0, +# external_source_settings.electron) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #@. single_term = residual +# #println("pdf_term residual = ", maximum(abs.(single_term))) +# # @loop_vpa ivpa begin +# # @loop_z iz begin +# # println("LHS: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 1) +# # end +# # println("") +# # end +# # println("") +# # add in numerical dissipation terms +# add_dissipation_term!(residual, pdf, scratch_dummy, z_spectral, z, vpa, vpa_spectral, +# num_diss_params, -1.0) +# #@. single_term = residual - single_term +# #println("dissipation residual = ", maximum(abs.(single_term))) +# #max_term .= max.(max_term, abs.(single_term)) +# # add in particle and heat source term(s) +# #@. single_term = residual +# #add_source_term!(residual, vpa.grid, z.grid, dvth_dz) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #stop() +# # @loop_vpa ivpa begin +# # @loop_z iz begin +# # println("total_residual: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 2) +# # end +# # println("") +# # end +# # stop() +# #dt_max = min(dt_max_zadv, dt_max_vadv) +# +# if collisions.krook_collision_frequency_prefactor_ee > 0.0 +# # Add a Krook collision operator +# # Set dt=-1 as we update the residual here rather than adding an update to +# # 'fvec_out'. +# electron_krook_collisions!(residual, pdf, dens, upar, upar_ion, vth, +# collisions, vperp, vpa, -1.0) +# end +# +# dt_max = dt_electron +# #println("dt_max: ", dt_max, " dt_max_zadv: ", dt_max_zadv, " dt_max_vadv: ", dt_max_vadv) +# return dt_max +#end function simple_z_advection!(advection_term, pdf, vth, z, vpa, dt_max_in) dt_max = dt_max_in From e593f362bbcf15a1ec687cdb77a68c89877d771b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 10:37:58 +0100 Subject: [PATCH 016/107] Move loop over r-dimension outside implicit solve for electrons For now, no parallelism in r supported - in future, need to add special functionality for that, similar to the 'anyv' regions, to support 2D simulations. --- moment_kinetics/src/boundary_conditions.jl | 19 +- moment_kinetics/src/derivatives.jl | 170 ++- .../src/electron_fluid_equations.jl | 206 +-- .../src/electron_kinetic_equation.jl | 1170 +++++++++-------- moment_kinetics/src/electron_vpa_advection.jl | 81 +- moment_kinetics/src/electron_z_advection.jl | 54 +- moment_kinetics/src/external_sources.jl | 29 +- moment_kinetics/src/krook_collisions.jl | 81 +- moment_kinetics/src/load_data.jl | 15 +- moment_kinetics/src/nonlinear_solvers.jl | 136 +- moment_kinetics/src/time_advance.jl | 86 +- moment_kinetics/src/velocity_moments.jl | 54 + 12 files changed, 1200 insertions(+), 901 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 4a1216bf7..36ef5916e 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -987,24 +987,33 @@ function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,5}, bc, vpe end function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,4}, bc, vperp, vperp_spectral, vperp_advect, diffusion) + @loop_r ir begin + @views enforce_vperp_boundary_condition!(f[:,:,:,ir], bc, vperp, vperp_spectral, + vperp_advect, diffusion, ir) + end + return nothing +end + +function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,3}, bc, vperp, + vperp_spectral, vperp_advect, diffusion, ir) if bc == "zero" || bc == "zero-impose-regularity" nvperp = vperp.n ngrid = vperp.ngrid # set zero boundary condition - @loop_r_z_vpa ir iz ivpa begin + @loop_z_vpa iz ivpa begin if diffusion || vperp_advect.speed[nvperp,ivpa,iz,ir] < 0.0 - f[ivpa,nvperp,iz,ir] = 0.0 + f[ivpa,nvperp,iz] = 0.0 end end # set regularity condition d F / d vperp = 0 at vperp = 0 if bc == "zero-impose-regularity" && (vperp.discretization == "gausslegendre_pseudospectral" || vperp.discretization == "chebyshev_pseudospectral") D0 = vperp_spectral.radau.D0 buffer = @view vperp.scratch[1:ngrid-1] - @loop_r_z_vpa ir iz ivpa begin + @loop_z_vpa iz ivpa begin if diffusion || vperp_advect.speed[1,ivpa,iz,ir] > 0.0 # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0 - @views @. buffer = D0[2:ngrid] * f[ivpa,2:ngrid,iz,ir] - f[ivpa,1,iz,ir] = -sum(buffer)/D0[1] + @views @. buffer = D0[2:ngrid] * f[ivpa,2:ngrid,iz] + f[ivpa,1,iz] = -sum(buffer)/D0[1] end end elseif bc == "zero" diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl index 71bd31427..1506ebbd1 100644 --- a/moment_kinetics/src/derivatives.jl +++ b/moment_kinetics/src/derivatives.jl @@ -11,10 +11,14 @@ module derivatives export derivative_r!, derivative_r_chrg!, derivative_r_ntrl! export derivative_z!, derivative_z_chrg!, derivative_z_ntrl! -using ..calculus: derivative!, second_derivative!, reconcile_element_boundaries_MPI! +using ..calculus: derivative!, second_derivative!, reconcile_element_boundaries_MPI!, + apply_adv_fac! +using ..communication using ..type_definitions: mk_float using ..looping +using MPI + """ Centered derivatives df/dr group of rountines for @@ -241,6 +245,85 @@ function derivative_z!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_floa end end +# df/dz +# 3D version for f[vpa,vperp,z]. Uses modified function name to avoid clash with 'standard' +# 3D version for ion/neutral moments. +function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_float,3}, + dfdz_lower_endpoints::AbstractArray{mk_float,2}, + dfdz_upper_endpoints::AbstractArray{mk_float,2}, + z_receive_buffer1::AbstractArray{mk_float,2}, + z_receive_buffer2::AbstractArray{mk_float,2}, z_spectral, z) + + # differentiate f w.r.t z + @loop_vperp_vpa ivperp ivpa begin + @views derivative!(dfdz[ivpa,ivperp,:], f[ivpa,ivperp,:], z, z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[ivpa,ivperp] = z.scratch_2d[1,1] + dfdz_upper_endpoints[ivpa,ivperp] = z.scratch_2d[end,end] + end + + # now reconcile element boundaries across + # processes with large message + if z.nelement_local < z.nelement_global + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + @serial_region begin + # now deal with endpoints that are stored across ranks + comm = z.comm + nrank = z.nrank + irank = z.irank + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # send highest end point on THIS rank + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + rreq1 = MPI.Irecv!(z_receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdz_upper_endpoints, comm; dest=idst, tag=1) + + # send lowest end point on THIS rank + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + rreq2 = MPI.Irecv!(z_receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdz_lower_endpoints, comm; dest=idst, tag=2) + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if z.bc == "periodic" + @. z_receive_buffer1 = 0.5 * (z_receive_buffer1 * dfdz_lower_endpoints) + else # directly use value from Cheb at extreme lower point + z_receive_buffer1 .= dfdz_lower_endpoints + end + else + @. z_receive_buffer1 = 0.5 * (z_receive_buffer1 * dfdz_lower_endpoints) + end + #now update the dfdz array -- using a slice appropriate to the dimension reconciled + @views dfdz[:,:,1] .= z_receive_buffer1 + + if irank == nrank-1 + if z.bc == "periodic" + @. z_receive_buffer2 = 0.5 * (z_receive_buffer2 * dfdz_upper_endpoints) + else #directly use value from Cheb + z_receive_buffer2 .= dfdz_upper_endpoints + end + else + @. z_receive_buffer2 = 0.5 * (z_receive_buffer2 * dfdz_upper_endpoints) + end + #now update the dfdz array -- using a slice appropriate to the dimension reconciled + @views dfdz[:,:,end] .= z_receive_buffer2 + end + # synchronize buffers + _block_synchronize() + end +end + #5D version for f[vpa,vperp,z,r,s] -> dfn ions function derivative_z!(dfdz::AbstractArray{mk_float,5}, f::AbstractArray{mk_float,5}, dfdz_lower_endpoints::AbstractArray{mk_float,4}, @@ -790,6 +873,91 @@ function derivative_z!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_floa end end +# df/dz +# 3D version for f[vpa,vperp,z]. Uses modified function name to avoid clash with 'standard' +# 3D version for ion/neutral moments. +function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_float,3}, + adv_fac, adv_fac_lower_buffer::AbstractArray{mk_float,2}, + adv_fac_upper_buffer::AbstractArray{mk_float,2}, + dfdz_lower_endpoints::AbstractArray{mk_float,2}, + dfdz_upper_endpoints::AbstractArray{mk_float,2}, + z_receive_buffer1::AbstractArray{mk_float,2}, + z_receive_buffer2::AbstractArray{mk_float,2}, z_spectral, z) + + # differentiate f w.r.t z + @loop_vperp_vpa ivperp ivpa begin + @views derivative!(dfdz[ivpa,ivperp,:], f[ivpa,ivperp,:], z, adv_fac[:,ivpa,ivperp], z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[ivpa,ivperp] = z.scratch_2d[1,1] + dfdz_upper_endpoints[ivpa,ivperp] = z.scratch_2d[end,end] + adv_fac_lower_buffer[ivpa,ivperp] = adv_fac[1,ivpa,ivperp] + adv_fac_upper_buffer[ivpa,ivperp] = adv_fac[end,ivpa,ivperp] + end + + # now reconcile element boundaries across + # processes with large message + if z.nelement_local < z.nelement_global + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + @serial_region begin + # now deal with endpoints that are stored across ranks + comm = z.comm + nrank = z.nrank + irank = z.irank + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # send highest end point on THIS rank + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + rreq1 = MPI.Irecv!(z_receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdz_upper_endpoints, comm; dest=idst, tag=1) + + # send lowest end point on THIS rank + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + rreq2 = MPI.Irecv!(z_receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdz_lower_endpoints, comm; dest=idst, tag=2) + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if z.bc == "periodic" + # depending on adv_fac, update the extreme lower endpoint with data from irank = nrank -1 + apply_adv_fac!(z_receive_buffer1, adv_fac_lower_endpoints, dfdz_lower_endpoints, 1) + else # directly use value from Cheb at extreme lower point + z_receive_buffer1 .= dfdz_lower_endpoints + end + else # depending on adv_fac, update the lower endpoint with data from irank = nrank -1 + apply_adv_fac!(z_receive_buffer1, adv_fac_lower_endpoints, dfdz_lower_endpoints, 1) + end + #now update the dfdz array -- using a slice appropriate to the dimension reconciled + @views dfdz[:,:,1] .= z_receive_buffer1 + + if irank == nrank-1 + if z.bc == "periodic" + # depending on adv_fac, update the extreme upper endpoint with data from irank = 0 + apply_adv_fac!(z_receive_buffer2, adv_fac_upper_endpoints, dfdz_upper_endpoints, -1) + else #directly use value from Cheb + z_receive_buffer2 .= dfdz_upper_endpoints + end + else # enforce continuity at upper endpoint + apply_adv_fac!(z_receive_buffer2, adv_fac_upper_endpoints, dfdz_upper_endpoints, -1) + end + #now update the dfdz array -- using a slice appropriate to the dimension reconciled + @views dfdz[:,:,end] .= z_receive_buffer2 + end + # synchronize buffers + _block_synchronize() + end +end + #5D version for f[vpa,vperp,z,r,s] -> dfn ion particles function derivative_z!(dfdz::AbstractArray{mk_float,5}, f::AbstractArray{mk_float,5}, advect, adv_fac_lower_buffer::AbstractArray{mk_float,4}, diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 68e800e85..7a86bf182 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -158,7 +158,26 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron ion_density, ion_upar, ion_ppar, density_neutral, uz_neutral, pz_neutral, moments, collisions, dt, composition, electron_source_settings, num_diss_params, - z; conduction=true) + r, z; conduction=true) + for ir ∈ 1:r.n + @views electron_energy_equation_no_r!(ppar_out[:,ir], ppar_in[:,ir], + electron_density[:,ir], electron_upar[:,ir], + ion_density[:,ir,:], ion_upar[:,ir,:], + ion_ppar[:,ir,:], density_neutral[:,ir,:], + uz_neutral[:,ir,:], pz_neutral[:,ir,:], + moments, collisions, dt, composition, + electron_source_settings, num_diss_params, + z, ir; conduction=conduction) + end + return nothing +end + +function electron_energy_equation_no_r!(ppar_out, ppar_in, electron_density, + electron_upar, ion_density, ion_upar, ion_ppar, + density_neutral, uz_neutral, pz_neutral, moments, + collisions, dt, composition, + electron_source_settings, num_diss_params, z, ir; + conduction=true) if composition.electron_physics == kinetic_electrons_with_temperature_equation # Hacky way to implement temperature equation: # - convert ppar to T by dividing by density @@ -167,22 +186,22 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron # old density? For initial testing, only looking at the electron initialisation # where density is not updated, this does not matter). - begin_r_z_region() + begin_z_region() # define some abbreviated variables for convenient use in rest of function me_over_mi = composition.me_over_mi nu_ei = collisions.nu_ei - T_in = moments.temp + T_in = @view moments.temp[:,ir] # calculate contribution to rhs of energy equation (formulated in terms of pressure) # arising from derivatives of ppar, qpar and upar - @loop_r_z ir iz begin + @loop_z iz begin # Convert ppar_out to temperature for most of this function - ppar_out[iz,ir] *= 2.0 / electron_density[iz,ir] - ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dT_dz[iz,ir] - + 2.0*T_in[iz,ir]*moments.dupar_dz[iz,ir]) + ppar_out[iz] *= 2.0 / electron_density[iz] + ppar_out[iz] -= dt*(electron_upar[iz]*moments.dT_dz[iz,ir] + + 2.0*T_in[iz]*moments.dupar_dz[iz,ir]) end if conduction - @loop_r_z ir iz begin - ppar_out[iz,ir] -= 2.0 * dt*moments.dqpar_dz[iz,ir] / electron_density[iz,ir] + @loop_z iz begin + ppar_out[iz] -= 2.0 * dt*moments.dqpar_dz[iz,ir] / electron_density[iz] end end # compute the contribution to the rhs of the energy equation @@ -190,36 +209,36 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron diffusion_coefficient = num_diss_params.electron.moment_dissipation_coefficient if diffusion_coefficient > 0.0 error("diffusion not implemented for electron temperature equation yet") - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt*diffusion_coefficient*moments.d2T_dz2[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt*diffusion_coefficient*moments.d2T_dz2[iz,ir] end end # compute the contribution to the rhs of the energy equation # arising from electron-ion collisions if nu_ei > 0.0 - @loop_s_r_z is ir iz begin - ppar_out[iz,ir] += dt * 2.0 * (2 * me_over_mi * nu_ei * (2.0*ion_ppar[iz,ir,is]/ion_density[iz,ir,is] - T_in[iz,ir])) - ppar_out[iz,ir] += dt * 2.0 * ((2/3) * moments.parallel_friction[iz,ir] - * (ion_upar[iz,ir,is]-electron_upar[iz,ir])) / electron_density[iz,ir] + @loop_s_z is iz begin + ppar_out[iz] += dt * 2.0 * (2 * me_over_mi * nu_ei * (2.0*ion_ppar[iz,is]/ion_density[iz,is] - T_in[iz])) + ppar_out[iz] += dt * 2.0 * ((2/3) * moments.parallel_friction[iz,ir] + * (ion_upar[iz,is]-electron_upar[iz])) / electron_density[iz] end end # add in contributions due to charge exchange/ionization collisions if composition.n_neutral_species > 0 if abs(collisions.charge_exchange_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += + @loop_sn_z isn iz begin + ppar_out[iz] += dt * 2.0 * me_over_mi * collisions.charge_exchange_electron * ( - 2*(pz_neutral[iz,ir,isn] - - density_neutral[iz,ir,isn]*ppar_in[iz,ir]/electron_density[iz,ir]) + - (2/3)*density_neutral[iz,ir,isn] * - (uz_neutral[iz,ir,isn] - electron_upar[iz,ir])^2) + 2*(pz_neutral[iz,isn] - + density_neutral[iz,isn]*ppar_in[iz]/electron_density[iz]) + + (2/3)*density_neutral[iz,isn] * + (uz_neutral[iz,isn] - electron_upar[iz])^2) end end if abs(collisions.ionization_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += - dt * 2.0 * collisions.ionization_electron * density_neutral[iz,ir,isn] * ( - ppar_in[iz,ir] / electron_density[iz,ir] - + @loop_sn_z isn iz begin + ppar_out[iz] += + dt * 2.0 * collisions.ionization_electron * density_neutral[iz,isn] * ( + ppar_in[iz] / electron_density[iz] - collisions.ionization_energy) end end @@ -228,88 +247,88 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron if electron_source_settings.active pressure_source_amplitude = moments.external_source_pressure_amplitude density_source_amplitude = moments.external_source_density_amplitude - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt * (2.0 * pressure_source_amplitude[iz,ir] - - T_in[iz,ir] * density_source_amplitude[iz,ir]) / - electron_density[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt * (2.0 * pressure_source_amplitude[iz] + - T_in[iz] * density_source_amplitude[iz]) / + electron_density[iz] end end # Now that forward-Euler step for temperature is finished, convert ppar_out back to # pressure. - @loop_r_z ir iz begin - ppar_out[iz,ir] *= 0.5 * electron_density[iz,ir] + @loop_z iz begin + ppar_out[iz] *= 0.5 * electron_density[iz] end else - begin_r_z_region() + begin_z_region() # define some abbreviated variables for convenient use in rest of function me_over_mi = composition.me_over_mi nu_ei = collisions.nu_ei # calculate contribution to rhs of energy equation (formulated in terms of pressure) # arising from derivatives of ppar, qpar and upar - @loop_r_z ir iz begin - ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dppar_dz[iz,ir] - + 3*ppar_in[iz,ir]*moments.dupar_dz[iz,ir]) + @loop_z iz begin + ppar_out[iz] -= dt*(electron_upar[iz]*moments.dppar_dz[iz,ir] + + 3*ppar_in[iz]*moments.dupar_dz[iz,ir]) end if conduction - @loop_r_z ir iz begin - ppar_out[iz,ir] -= dt*moments.dqpar_dz[iz,ir] + @loop_z iz begin + ppar_out[iz] -= dt*moments.dqpar_dz[iz,ir] end end - # @loop_r_z ir iz begin - # ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dppar_dz[iz,ir] + # @loop_z iz begin + # ppar_out[iz] -= dt*(electron_upar[iz]*moments.dppar_dz[iz,ir] # + (2/3)*moments.dqpar_dz[iz,ir] - # + (5/3)*ppar_in[iz,ir]*moments.dupar_dz[iz,ir]) + # + (5/3)*ppar_in[iz]*moments.dupar_dz[iz,ir]) # end # compute the contribution to the rhs of the energy equation # arising from artificial diffusion diffusion_coefficient = num_diss_params.electron.moment_dissipation_coefficient if diffusion_coefficient > 0.0 - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt*diffusion_coefficient*moments.d2ppar_dz2[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt*diffusion_coefficient*moments.d2ppar_dz2[iz,ir] end end # compute the contribution to the rhs of the energy equation # arising from electron-ion collisions if nu_ei > 0.0 - @loop_s_r_z is ir iz begin - ppar_out[iz,ir] += dt * (2 * me_over_mi * nu_ei * (ion_ppar[iz,ir,is] - ppar_in[iz,ir])) - ppar_out[iz,ir] += dt * ((2/3) * moments.parallel_friction[iz,ir] - * (ion_upar[iz,ir,is]-electron_upar[iz,ir])) + @loop_s_z is iz begin + ppar_out[iz] += dt * (2 * me_over_mi * nu_ei * (ion_ppar[iz,is] - ppar_in[iz])) + ppar_out[iz] += dt * ((2/3) * moments.parallel_friction[iz] + * (ion_upar[iz,is]-electron_upar[iz])) end end # add in contributions due to charge exchange/ionization collisions if composition.n_neutral_species > 0 if abs(collisions.charge_exchange_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += + @loop_sn_z isn iz begin + ppar_out[iz] += dt * me_over_mi * collisions.charge_exchange_electron * ( - 2*(electron_density[iz,ir]*pz_neutral[iz,ir,isn] - - density_neutral[iz,ir,isn]*ppar_in[iz,ir]) + - (2/3)*electron_density[iz,ir]*density_neutral[iz,ir,isn] * - (uz_neutral[iz,ir,isn] - electron_upar[iz,ir])^2) + 2*(electron_density[iz]*pz_neutral[iz,isn] - + density_neutral[iz,isn]*ppar_in[iz]) + + (2/3)*electron_density[iz]*density_neutral[iz,isn] * + (uz_neutral[iz,isn] - electron_upar[iz])^2) end end if abs(collisions.ionization_electron) > 0.0 - # @loop_s_r_z is ir iz begin - # ppar_out[iz,ir] += - # dt * collisions.ionization_electron * density_neutral[iz,ir,is] * ( - # ppar_in[iz,ir] - - # (2/3)*electron_density[iz,ir] * collisions.ionization_energy) + # @loop_s_z is iz begin + # ppar_out[iz] += + # dt * collisions.ionization_electron * density_neutral[iz,is] * ( + # ppar_in[iz] - + # (2/3)*electron_density[iz] * collisions.ionization_energy) # end - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += - dt * collisions.ionization_electron * density_neutral[iz,ir,isn] * ( - ppar_in[iz,ir] - - electron_density[iz,ir] * collisions.ionization_energy) + @loop_sn_z isn iz begin + ppar_out[iz] += + dt * collisions.ionization_electron * density_neutral[iz,isn] * ( + ppar_in[iz] - + electron_density[iz] * collisions.ionization_energy) end end end if electron_source_settings.active source_amplitude = moments.external_source_pressure_amplitude - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt * source_amplitude[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt * source_amplitude[iz] end end end @@ -318,9 +337,9 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron end """ - electron_energy_residual!(residual, electron_ppar_out, fvec_in, moments, - collisions, composition, external_source_settings, - num_diss_params, z, dt) + electron_energy_residual!(residual, electron_ppar_out, electron_ppar, in, + fvec_in, moments, collisions, composition, + external_source_settings, num_diss_params, z, dt, ir) The residual is a function whose input is `electron_ppar`, so that when it's output `residual` is zero, electron_ppar is the result of a backward-Euler timestep: @@ -329,27 +348,34 @@ The residual is a function whose input is `electron_ppar`, so that when it's out This function assumes any needed moment derivatives are already calculated using `electron_ppar_out` and stored in `moments.electron`. + +Note that this function operates on a single point in `r`, given by `ir`, and `residual`, +`electron_ppar_out`, and `electron_ppar_in` should have no r-dimension. """ -function electron_energy_residual!(residual, electron_ppar_out, fvec_in, moments, - collisions, composition, external_source_settings, - num_diss_params, z, dt) - begin_r_z_region() - electron_ppar_in = fvec_in.electron_ppar - @loop_r_z ir iz begin - residual[iz,ir] = electron_ppar_in[iz,ir] +function electron_energy_residual!(residual, electron_ppar_out, electron_ppar, in, + fvec_in, moments, collisions, composition, + external_source_settings, num_diss_params, z, dt, ir) + begin_z_region() + @loop_z iz begin + residual[iz] = electron_ppar_in[iz] end - electron_energy_equation!(residual, electron_ppar_out, - fvec_in.density, fvec_in.electron_upar, fvec_in.density, - fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, - fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, - collisions, dt, composition, - external_source_settings.electron, num_diss_params, z) + @views electron_energy_equation_no_r!(residual, electron_ppar_out, + fvec_in.electron_density[:,ir], + fvec_in.electron_upar[:,ir], + fvec_in.density[:,ir,:], fvec_in.upar[:,ir,:], + fvec_in.ppar[:,ir,:], + fvec_in.density_neutral[:,ir,:], + fvec_in.uz_neutral[:,ir,:], + fvec_in.pz_neutral[:,ir,:], moments.electron, + collisions, dt, composition, + external_source_settings.electron, + num_diss_params, z, ir) # Now # residual = f_in + dt*RHS(f_out) # so update to desired residual - begin_r_z_region() - @loop_r_z ir iz begin - residual[iz,ir] = (electron_ppar_out[iz,ir] - residual[iz,ir]) + begin_z_region() + @loop_z iz begin + residual[iz] = (electron_ppar_out[iz] - residual[iz]) end end @@ -592,6 +618,20 @@ function calculate_electron_qpar_from_pdf!(qpar, ppar, vth, pdf, vpa) end end +""" +Calculate the parallel component of the electron heat flux, defined as qpar = 2 * ppar * +vth * int dwpa (pdf * wpa^3). This version of the function does not loop over `r`. `pdf` +should have no r-dimension, while the moment variables are indexed at `ir`. +""" +function calculate_electron_qpar_from_pdf_no_r!(qpar, ppar, vth, pdf, vpa, ir) + # specialise to 1V for now + begin_z_region() + ivperp = 1 + @loop_z iz begin + @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid.^3, vpa.wgts) + end +end + function calculate_electron_heat_source!(heat_source, ppar_e, dupar_dz, dens_n, ionization, ionization_energy, dens_e, ppar_i, nu_ei, me_over_mi, T_wall, z) begin_r_z_region() diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index a715d38f6..8dac6847e 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -18,8 +18,11 @@ using ..array_allocation: allocate_float using ..electron_fluid_equations: calculate_electron_moments!, update_electron_vth_temperature!, calculate_electron_qpar_from_pdf!, + calculate_electron_qpar_from_pdf_no_r!, calculate_electron_parallel_friction_force! -using ..electron_fluid_equations: electron_energy_equation!, electron_energy_residual! +using ..electron_fluid_equations: electron_energy_equation!, + electron_energy_equation_no_r!, + electron_energy_residual! using ..electron_z_advection: electron_z_advection!, update_electron_speed_z! using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa! using ..em_fields: update_phi! @@ -33,7 +36,8 @@ using ..nonlinear_solvers using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm, adaptive_timestep_update_t_params! using ..utils: get_minimum_CFL_z, get_minimum_CFL_vpa -using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_derivatives! +using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_derivatives!, + calculate_electron_moment_derivatives_no_r! """ update_electron_pdf is a function that uses the electron kinetic equation @@ -206,7 +210,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll moments.neutral.dens, moments.neutral.uz, moments.neutral.pz, moments.electron, collisions, ion_dt, composition, external_source_settings.electron, - num_diss_params, z) + num_diss_params, r, z) end if !evolve_ppar @@ -344,14 +348,17 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll end # Do a forward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. - electron_kinetic_equation_euler_update!(scratch[istage+1], scratch[istage], - moments, z, vperp, vpa, z_spectral, - vpa_spectral, z_advect, vpa_advect, - scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, t_params.dt[]; - evolve_ppar=evolve_ppar, - ion_dt=ion_dt) + @loop_r ir begin + @views electron_kinetic_equation_euler_update!( + scratch[istage+1].pdf_electron[:,:,:,ir], + scratch[istage+1].electron_ppar[:,ir], + scratch[istage].pdf_electron[:,:,:,ir], + scratch[istage].electron_ppar[:,ir], moments, z, vperp, vpa, + z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + collisions, composition, external_source_settings, + num_diss_params, t_params.dt[], ir; evolve_ppar=evolve_ppar, + ion_dt=ion_dt) + end speedup_hack!(scratch[istage+1], scratch[istage], z_speedup_fac, z, vpa; evolve_ppar=evolve_ppar) @@ -622,16 +629,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.dt[] = t_params.previous_dt[] end - begin_r_z_region() - - # create several (r) dimension dummy arrays for use in taking derivatives - buffer_r_1 = @view scratch_dummy.buffer_rs_1[:,1] - buffer_r_2 = @view scratch_dummy.buffer_rs_2[:,1] - buffer_r_3 = @view scratch_dummy.buffer_rs_3[:,1] - buffer_r_4 = @view scratch_dummy.buffer_rs_4[:,1] - buffer_r_5 = @view scratch_dummy.buffer_rs_5[:,1] - buffer_r_6 = @view scratch_dummy.buffer_rs_6[:,1] - begin_r_z_region() @loop_r_z ir iz begin # update the electron thermal speed using the updated electron parallel pressure @@ -661,7 +658,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos moments.neutral.dens, moments.neutral.uz, moments.neutral.pz, moments.electron, collisions, ion_dt, composition, external_source_settings.electron, - num_diss_params, z) + num_diss_params, r, z) if t_params.dt[] > 0.5 * ion_dt begin_serial_region() @@ -710,8 +707,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # equation initial_step_counter = t_params.step_counter[] t_params.step_counter[] += 1 - # initialise the electron pdf convergence flag to false - electron_pdf_converged = false begin_serial_region() t_params.moments_output_counter[] += 1 @@ -721,400 +716,388 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.moments_output_counter[], r, z, vperp, vpa) end end - first_step = true - # evolve (artificially) in time until the residual is less than the tolerance - while (!electron_pdf_converged - && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) - || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) - && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) + electron_pdf_converged = false + # No paralleism in r for now - will need to add a specially adapted shared-memory + # parallelism scheme to allow it for 2D1V or 2D2V simulations. + for ir ∈ 1:r.n + # create several 0D dummy arrays for use in taking derivatives + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # initialise the electron pdf convergence flag to false + electron_pdf_converged = false + + first_step = true + # evolve (artificially) in time until the residual is less than the tolerance + while (!electron_pdf_converged + && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) + || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) + && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) - old_scratch = scratch[1] - new_scratch = scratch[t_params.n_rk_stages+1] + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] - # Set the initial values for the next step to the final values from the previous - # step. The initial guess for f_electron_new and electron_ppar_new are just the - # values from the old timestep, so no need to change those. - begin_r_z_vperp_vpa_region() - f_electron_old = old_scratch.pdf_electron - f_electron_new = new_scratch.pdf_electron - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - f_electron_old[ivpa,ivperp,iz,ir] = f_electron_new[ivpa,ivperp,iz,ir] - end - electron_ppar_old = old_scratch.electron_ppar - electron_ppar_new = new_scratch.electron_ppar - if evolve_ppar - begin_r_z_region() - @loop_r_z ir iz begin - electron_ppar_old[iz,ir] = electron_ppar_new[iz,ir] + # Set the initial values for the next step to the final values from the previous + # step. The initial guess for f_electron_new and electron_ppar_new are just the + # values from the old timestep, so no need to change those. + begin_z_vperp_vpa_region() + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_old[ivpa,ivperp,iz] = f_electron_new[ivpa,ivperp,iz] + end + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_old[iz] = electron_ppar_new[iz] + end end - end - # Do a forward-Euler update of the electron pdf as an initial guess. Even when - # evolving electron_ppar, do not update electron_ppar here because if dt is bigger - # than ion_dt, then an explicit timestep will likely make electron_ppar over-shoot - # which would just take more iterations in the Newton-Krylov solve to fix. - electron_kinetic_equation_euler_update!(new_scratch, old_scratch, moments, z, - vperp, vpa, z_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, - collisions, composition, - external_source_settings, num_diss_params, - t_params.dt[]) - - # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the - # electron parallel pressure. - function residual_func!(residual, new_variables) - electron_ppar_residual, f_electron_residual = residual - electron_ppar_newvar, f_electron_newvar = new_variables + # Do a forward-Euler update of the electron pdf as an initial guess. Even when + # evolving electron_ppar, do not update electron_ppar here because if dt is bigger + # than ion_dt, then an explicit timestep will likely make electron_ppar over-shoot + # which would just take more iterations in the Newton-Krylov solve to fix. + electron_kinetic_equation_euler_update!(f_electron_new, electron_ppar_new, + f_electron_old, electron_ppar_old, + moments, z, vperp, vpa, z_spectral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, collisions, + composition, external_source_settings, + num_diss_params, t_params.dt[], ir) - new_scratch_electron = scratch_electron_pdf(f_electron_newvar, electron_ppar_newvar) + # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the + # electron parallel pressure. + function residual_func!(residual, new_variables) + electron_ppar_residual, f_electron_residual = residual + electron_ppar_newvar, f_electron_newvar = new_variables - apply_electron_bc_and_constraints!(new_scratch_electron, phi, moments, z, - vperp, vpa, vperp_spectral, vpa_spectral, - vpa_advect, num_diss_params, composition) + apply_electron_bc_and_constraints_no_r!(f_electron_newvar, phi, moments, + z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir) - # Only the first entry in the `electron_pdf_substruct` will be used, so does not - # matter what we put in the second and third except that they have the right type. - new_pdf = (electron=electron_pdf_substruct(f_electron_newvar, f_electron_newvar, - f_electron_newvar,),) - # Calculate heat flux and derivatives using new_variables - calculate_electron_qpar_from_pdf!(moments.electron.qpar, electron_ppar_newvar, - moments.electron.vth, f_electron_newvar, vpa) + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, + electron_ppar_newvar, + moments.electron.vth, + f_electron_newvar, vpa, ir) - if evolve_ppar - this_dens = moments.electron.dens - this_upar = moments.electron.upar - begin_r_z_region() - this_vth = moments.electron.vth - @loop_r_z ir iz begin - # update the electron thermal speed using the updated electron - # parallel pressure - this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / - (this_dens[iz,ir] * - composition.me_over_mi))) + if evolve_ppar + this_dens = moments.electron.dens + this_upar = moments.electron.upar + this_vth = moments.electron.vth + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=electron_ppar_newvar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + else + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) end - calculate_electron_moment_derivatives!( - moments, - (electron_density=this_dens, - electron_upar=this_upar, - electron_ppar=electron_ppar_newvar), - scratch_dummy, z, z_spectral, - num_diss_params.electron.moment_dissipation_coefficient, - composition.electron_physics) - else - # compute the z-derivative of the parallel electron heat flux - @views derivative_z!(moments.electron.dqpar_dz, moments.electron.qpar, - buffer_r_1, buffer_r_2, buffer_r_3, buffer_r_4, - z_spectral, z) - end - if evolve_ppar - begin_r_z_region() - @loop_r_z ir iz begin - electron_ppar_residual[iz,ir] = electron_ppar_old[iz,ir] - end - else - begin_r_z_region() - @loop_r_z ir iz begin - electron_ppar_residual[iz,ir] = 0.0 + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_old[iz,ir] + end + else + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = 0.0 + end end - end - - # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the - # electron_pdf member of the first argument, so if we set the electron_pdf member - # of the first argument to zero, and pass dt=1, then it will evaluate the time - # derivative, which is the residual for a steady-state solution. - begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz,ir] = f_electron_old[ivpa,ivperp,iz,ir] - end - residual_scratch_electron = scratch_electron_pdf(f_electron_residual, - electron_ppar_residual) - new_scratch_electron = scratch_electron_pdf(f_electron_newvar, electron_ppar_newvar) - electron_kinetic_equation_euler_update!(residual_scratch_electron, - new_scratch_electron, moments, z, vperp, - vpa, z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, t_params.dt[]; - evolve_ppar=evolve_ppar, - ion_dt=ion_dt) - # Now - # residual = f_electron_old + dt*RHS(f_electron_newvar) - # so update to desired residual - begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz,ir,is] = f_electron_newvar[ivpa,ivperp,iz,ir,is] - f_electron_residual[ivpa,ivperp,iz,ir,is] - end - if evolve_ppar - begin_r_z_region() - @loop_r_z ir iz begin - electron_ppar_residual[iz,ir] = electron_ppar_newvar[iz,ir] - electron_ppar_residual[iz,ir] + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] + end + electron_kinetic_equation_euler_update!( + f_electron_residual, electron_ppar_residual, f_electron_newvar, + electron_ppar_newvar, moments, z, vperp, vpa, z_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, num_diss_params, t_params.dt[], + ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt) + + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_newvar[ivpa,ivperp,iz] - f_electron_residual[ivpa,ivperp,iz] + end + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_newvar[iz] - electron_ppar_residual[iz] + end end - end - # Set residual to zero where pdf_electron is determined by boundary conditions. - if vpa.n > 1 - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin - @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz,ir], vpa.bc, - vpa_advect[1].speed[:,ivperp,iz,ir], - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - vpa, vpa_spectral) + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end end - end - if vperp.n > 1 - begin_r_z_vpa_region() - enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, - vperp_adv, vperp_diffusion) - end - if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_electron_newvar, f_old, and residual - # should all be zero at exactly the same set of grid points, so it is - # reasonable to zero-out `residual` to impose the boundary condition. We - # impose this after subtracting f_old in case rounding errors, etc. mean - # that at some point f_old had a different boundary condition cut-off - # index. - begin_r_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - @loop_r ir begin + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_electron_newvar, f_old, and residual + # should all be zero at exactly the same set of grid points, so it is + # reasonable to zero-out `residual` to impose the boundary condition. We + # impose this after subtracting f_old in case rounding errors, etc. mean + # that at some point f_old had a different boundary condition cut-off + # index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], moments.electron.upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 + f_electron_residual[ivpa,ivperp,iz] = 0.0 end end end - end - if z.irank == z.nrank - 1 - iz = z.n - @loop_r ir begin + if z.irank == z.nrank - 1 + iz = z.n v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], moments.electron.upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 + f_electron_residual[ivpa,ivperp,iz] = 0.0 end end end end - end - begin_r_z_region() - @loop_r_z ir iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz,ir], - f_electron_newvar[:,:,iz,ir], - (evolve_density=true, - evolve_upar=true, - evolve_ppar=true), - vpa) - end - return nothing - end + begin_z_region() + @loop_z iz begin + @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], + f_electron_newvar[:,:,iz], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), + vpa) + end + return nothing + end + + residual = (scratch_dummy.implicit_buffer_z_1, scratch_dummy.implicit_buffer_vpavperpz_1) + delta_x = (scratch_dummy.implicit_buffer_z_2, + scratch_dummy.implicit_buffer_vpavperpz_2) + rhs_delta = (scratch_dummy.implicit_buffer_z_3, + scratch_dummy.implicit_buffer_vpavperpz_3) + v = (scratch_dummy.implicit_buffer_z_4, + scratch_dummy.implicit_buffer_vpavperpz_4) + w = (scratch_dummy.implicit_buffer_z_5, + scratch_dummy.implicit_buffer_vpavperpz_5) + + newton_success = newton_solve!((electron_ppar_new, f_electron_new), + residual_func!, residual, delta_x, rhs_delta, + v, w, nl_solver_params; + left_preconditioner=identity, + right_preconditioner=identity, + coords=(z=z, vperp=vperp, vpa=vpa)) + if newton_success + #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) + begin_serial_region() + @serial_region begin + # update the time following the pdf update + t_params.t[] += t_params.dt[] + + if first_step && !reduced_by_ion_dt + # Adjust t_params.previous_dt[] which gives the initial timestep for + # the electron pseudotimestepping loop. + # If ion_dt ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) + elseif nl_solver_params.max_linear_iterations_this_step[] > 10 + # Step succeeded, but took a lot of iterations so decrease initial + # step size. + print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] /= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) + elseif nl_solver_params.max_linear_iterations_this_step[] < 2 + # Only took a few iterations, so increase initial step size. + print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] *= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + end + end - residual = (scratch_dummy.implicit_buffer_zr_1, scratch_dummy.implicit_buffer_vpavperpzr_1) - delta_x = (scratch_dummy.implicit_buffer_zr_2, - scratch_dummy.implicit_buffer_vpavperpzr_2) - rhs_delta = (scratch_dummy.implicit_buffer_zr_3, - scratch_dummy.implicit_buffer_vpavperpzr_3) - v = (scratch_dummy.implicit_buffer_zr_4, - scratch_dummy.implicit_buffer_vpavperpzr_4) - w = (scratch_dummy.implicit_buffer_zr_5, - scratch_dummy.implicit_buffer_vpavperpzr_5) - - newton_success = newton_solve!((electron_ppar_new, f_electron_new), residual_func!, - residual, delta_x, rhs_delta, v, w, nl_solver_params; - left_preconditioner=nothing, - right_preconditioner=nothing, - coords=(r=r, z=z, vperp=vperp, vpa=vpa)) - if newton_success - #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) - begin_serial_region() - @serial_region begin - # update the time following the pdf update - t_params.t[] += t_params.dt[] - - if first_step && !reduced_by_ion_dt - # Adjust t_params.previous_dt[] which gives the initial timestep for - # the electron pseudotimestepping loop. - # If ion_dt ", t_params.previous_dt[]) - #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] > 10 - # Step succeeded, but took a lot of iterations so decrease initial - # step size. - print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) - t_params.previous_dt[] /= t_params.max_increase_factor - println(" -> ", t_params.previous_dt[]) - #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) + # Adjust the timestep depending on the iteration count. + # Note nl_solver_params.max_linear_iterations_this_step[] gives the total + # number of iterations, so is a better measure of the total work done by + # the solver than the nonlinear iteration count, or the linear iterations + # per nonlinear iteration + #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) + if nl_solver_params.max_linear_iterations_this_step[] > 5 && t_params.dt[] > t_params.previous_dt[] + # Step succeeded, but took a lot of iterations so decrease step size. + t_params.dt[] /= t_params.max_increase_factor + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.05 * nl_solver_params.nonlinear_max_iterations, 5) elseif nl_solver_params.max_linear_iterations_this_step[] < 2 - # Only took a few iterations, so increase initial step size. - print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) - t_params.previous_dt[] *= t_params.max_increase_factor - println(" -> ", t_params.previous_dt[]) + #elseif nl_solver_params.max_nonlinear_iterations_this_step[] < 3 + # Only took a few iterations, so increase step size. + t_params.dt[] *= t_params.max_increase_factor end end + _block_synchronize() - # Adjust the timestep depending on the iteration count. - # Note nl_solver_params.max_linear_iterations_this_step[] gives the total - # number of iterations, so is a better measure of the total work done by - # the solver than the nonlinear iteration count, or the linear iterations - # per nonlinear iteration - #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) - if nl_solver_params.max_linear_iterations_this_step[] > 5 && t_params.dt[] > t_params.previous_dt[] - # Step succeeded, but took a lot of iterations so decrease step size. - t_params.dt[] /= t_params.max_increase_factor - #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.05 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] < 2 - #elseif nl_solver_params.max_nonlinear_iterations_this_step[] < 3 - # Only took a few iterations, so increase step size. - t_params.dt[] *= t_params.max_increase_factor + first_step = false + else + begin_serial_region() + @serial_region begin + t_params.dt[] *= 0.5 end -#if nl_solver_params.max_nonlinear_iterations_this_step[] < 4 -# # Only took a few iterations, so increase step size. -# t_params.dt[] *= 1.5 -#elseif nl_solver_params.max_nonlinear_iterations_this_step[] > 10 -# # Only took a few iterations, so increase step size. -# t_params.dt[] *= 0.9 -#end - end - _block_synchronize() - - first_step = false - else - begin_serial_region() - @serial_region begin - t_params.dt[] *= 0.5 - end - _block_synchronize() - - # Swap old_scratch and new_scratch so that the next step restarts from the - # same state - scratch[1] = new_scratch - scratch[t_params.n_rk_stages+1] = old_scratch - old_scratch = scratch[1] - new_scratch = scratch[t_params.n_rk_stages+1] - end - - apply_electron_bc_and_constraints!(new_scratch, phi, moments, z, vperp, vpa, - vperp_spectral, vpa_spectral, vpa_advect, - num_diss_params, composition) - - function update_derived_moments_and_derivatives(update_vth=false) - # update the electron heat flux - moments.electron.qpar_updated[] = false - calculate_electron_qpar_from_pdf!(moments.electron.qpar, - electron_ppar_new, - moments.electron.vth, f_electron_new, vpa) + _block_synchronize() + + # Swap old_scratch and new_scratch so that the next step restarts from the + # same state + scratch[1] = new_scratch + scratch[t_params.n_rk_stages+1] = old_scratch + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + end + + apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, + vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir) + + if !evolve_ppar + # update the electron heat flux + moments.electron.qpar_updated[] = false + calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, + electron_ppar_new, + moments.electron.vth, + f_electron_new, vpa, ir) - if evolve_ppar - this_ppar = electron_ppar_new - this_dens = moments.electron.dens - this_upar = moments.electron.upar - if update_vth - begin_r_z_region() - this_vth = moments.electron.vth - @loop_r_z ir iz begin - # update the electron thermal speed using the updated electron - # parallel pressure - this_vth[iz,ir] = sqrt(abs(2.0 * this_ppar[iz,ir] / - (this_dens[iz,ir] * - composition.me_over_mi))) - end - end - calculate_electron_moment_derivatives!( - moments, - (electron_density=this_dens, - electron_upar=this_upar, - electron_ppar=this_ppar), - scratch_dummy, z, z_spectral, - num_diss_params.electron.moment_dissipation_coefficient, - composition.electron_physics) - else # compute the z-derivative of the parallel electron heat flux - @views derivative_z!(moments.electron.dqpar_dz, moments.electron.qpar, - buffer_r_1, buffer_r_2, buffer_r_3, buffer_r_4, - z_spectral, z) - end - end - update_derived_moments_and_derivatives() - - residual = -1.0 - if newton_success - # Calculate residuals to decide if iteration is converged. - # Might want an option to calculate the residual only after a certain number - # of iterations (especially during initialization when there are likely to be - # a large number of iterations required) to avoid the expense, and especially - # the global MPI.Bcast()? - begin_r_z_vperp_vpa_region() - residual = steady_state_residuals(new_scratch.pdf_electron, - old_scratch.pdf_electron, - t_params.previous_dt[]; use_mpi=true, - only_max_abs=true) - if global_rank[] == 0 - residual = first(values(residual))[1] - end - if evolve_ppar - ppar_residual = - steady_state_residuals(new_scratch.electron_ppar, - old_scratch.electron_ppar, - t_params.previous_dt[]; use_mpi=true, - only_max_abs=true) + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + end + + residual = -1.0 + if newton_success + # Calculate residuals to decide if iteration is converged. + # Might want an option to calculate the residual only after a certain number + # of iterations (especially during initialization when there are likely to be + # a large number of iterations required) to avoid the expense, and especially + # the global MPI.Bcast()? + begin_z_vperp_vpa_region() + residual = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.previous_dt[]; use_mpi=true, + only_max_abs=true) if global_rank[] == 0 - ppar_residual = first(values(ppar_residual))[1] - residual = max(residual, ppar_residual) + residual = first(values(residual))[1] end - end - if global_rank[] == 0 - if residual_tolerance === nothing - residual_tolerance = t_params.converged_residual_value + if evolve_ppar + ppar_residual = + steady_state_residuals(new_scratch.electron_ppar, + old_scratch.electron_ppar, + t_params.previous_dt[]; use_mpi=true, + only_max_abs=true) + if global_rank[] == 0 + ppar_residual = first(values(ppar_residual))[1] + residual = max(residual, ppar_residual) + end end - electron_pdf_converged = abs(residual) < residual_tolerance + if global_rank[] == 0 + if residual_tolerance === nothing + residual_tolerance = t_params.converged_residual_value + end + electron_pdf_converged = abs(residual) < residual_tolerance + end + electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world) end - electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world) - end - if (mod(t_params.step_counter[] - initial_step_counter,100) == 0) - begin_serial_region() - @serial_region begin - if z.irank == 0 && z.irank == z.nrank - 1 - println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual) - elseif z.irank == 0 - println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual) + if (mod(t_params.step_counter[] - initial_step_counter,100) == 0) + begin_serial_region() + @serial_region begin + if z.irank == 0 && z.irank == z.nrank - 1 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual) + elseif z.irank == 0 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual) + end end end - end - if ((t_params.step_counter[] % t_params.nwrite_moments == 0) - || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) - - begin_serial_region() - t_params.moments_output_counter[] += 1 - @serial_region begin - if io_electron !== nothing - t_params.write_moments_output[] = false - write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, - vpa) + if ((t_params.step_counter[] % t_params.nwrite_moments == 0) + || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) + + if r.n == 1 + # For now can only do I/O within the pseudo-timestepping loop when there + # is no r-dimension, because different points in r would take different + # number and length of timesteps to converge. + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + t_params.write_moments_output[] = false + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, + vpa) + end + end end end - end - reset_nonlinear_per_stage_counters!(nl_solver_params) + reset_nonlinear_per_stage_counters!(nl_solver_params) - t_params.step_counter[] += 1 - if electron_pdf_converged + t_params.step_counter[] += 1 + if electron_pdf_converged + break + end + end + if !electron_pdf_converged + # If electron solve failed to converge for some `ir`, the failure will be + # handled by restarting the ion timestep with a smaller dt, so no need to try + # to solve for further `ir` values. break end end @@ -1166,13 +1149,12 @@ Do an implicit solve which finds: the steady-state electron shape function \$g_e backward-Euler advanced electron pressure which is updated using \$g_e\$ at the new time-level. -Implicit electron solve includes r-dimension. For 1D runs this makes no difference. In 2D -it might or might not be necessary. If the r-dimension is not needed in the implicit -solve, we would need to work on the parallelisation. The simplest option would be a -non-parallelised outer loop over r, with each nonlinear solve being parallelised over -{z,vperp,vpa}. More efficient might be to add an equivalent to the 'anyv' parallelisation -used for the collision operator (e.g. 'anyzv'?) to allow the outer r-loop to be -parallelised. +The r-dimension is not parallelised. For 1D runs this makes no difference. In 2D it might +or might not be necessary. If r-dimension parallelisation is needed, it would need some +work. The simplest option would be a non-parallelised outer loop over r, with each +nonlinear solve being parallelised over {z,vperp,vpa}. More efficient might be to add an +equivalent to the 'anyv' parallelisation used for the collision operator (e.g. 'anyzv'?) +to allow the outer r-loop to be parallelised. """ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, moments, fields, collisions, composition, geometry, @@ -1206,89 +1188,98 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, collisions, dt, composition, - external_source_settings.electron, num_diss_params, z) - - function residual_func!(residual, new_variables) - electron_ppar_residual, f_electron_residual = residual - electron_ppar_new, f_electron_new = new_variables - - new_scratch = scratch_pdf(fvec_in.pdf, fvec_in.density, fvec_in.upar, fvec_in.ppar, - fvec_in.pperp, fvec_in.temp_z_s, - fvec_in.electron_density, fvec_in.electron_upar, - electron_ppar_new, fvec_in.electron_pperp, - fvec_in.electron_temp, fvec_in.pdf_neutral, - fvec_in.density_neutral, fvec_in.uz_neutral, - fvec_in.pz_neutral) - new_scratch_electron = scratch_electron_pdf(f_electron_new, electron_ppar_new) - - apply_electron_bc_and_constraints!(new_scratch_electron, fields.phi, moments, z, - vperp, vpa, vperp_spectral, vpa_spectral, - vpa_advect, num_diss_params, composition) - - # Only the first entry in the `electron_pdf_substruct` will be used, so does not - # matter what we put in the second and third except that they have the right type. - new_pdf = (electron=electron_pdf_substruct(f_electron_new, f_electron_new, - f_electron_new,),) - # Calculate heat flux and derivatives using new_variables - calculate_electron_moments!(new_scratch, new_pdf, moments, composition, - collisions, r, z, vpa) - calculate_electron_moment_derivatives!(moments, new_scratch, scratch_dummy, z, - z_spectral, - num_diss_params.electron.moment_dissipation_coefficient, - composition.electron_physics) - - electron_energy_residual!(electron_ppar_residual, electron_ppar_new, fvec_in, - moments, collisions, composition, - external_source_settings, num_diss_params, z, dt) - - # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the - # electron_pdf member of the first argument, so if we set the electron_pdf member - # of the first argument to zero, and pass dt=1, then it will evaluate the time - # derivative, which is the residual for a steady-state solution. - begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 - end - residual_scratch_electron = scratch_electron_pdf(f_electron_residual, - electron_ppar_residual) - new_scratch_electron = scratch_electron_pdf(f_electron_new, electron_ppar_new) - electron_kinetic_equation_euler_update!(residual_scratch_electron, - new_scratch_electron, moments, z, vperp, - vpa, z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, - pdf_electron_normalisation_factor) - - # Set residual to zero where pdf_electron is determined by boundary conditions. - if vpa.n > 1 - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin - @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz,ir], vpa.bc, - vpa_advect[1].speed[:,ivperp,iz,ir], - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - vpa, vpa_spectral) + external_source_settings.electron, num_diss_params, r, z) + + for ir ∈ 1:r.n + function residual_func!(residual, new_variables; debug=false) + electron_ppar_residual, f_electron_residual = residual + electron_ppar_new, f_electron_new = new_variables + + apply_electron_bc_and_constraints_no_r!(f_electron_new, fields.phi, moments, + z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir) + + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, + electron_ppar_new, + moments.electron.vth, + f_electron_new, vpa, ir) + + this_dens = moments.electron.dens + this_upar = moments.electron.upar + this_vth = moments.electron.vth + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_new[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=electron_ppar_new), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = 0.0 + end + #@views electron_energy_residual!(electron_ppar_residual, electron_ppar_new, + # fvec_in.ppar[:,ir], fvec_in, moments, + # collisions, composition, + # external_source_settings, num_diss_params, + # z, dt, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = 0.0 end - end - if vperp.n > 1 - begin_r_z_vpa_region() - enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, - vperp_adv, vperp_diffusion) - end - if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. - begin_r_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - @loop_r ir begin + electron_kinetic_equation_euler_update!( + f_electron_residual, electron_ppar_residual, f_electron_new, + electron_ppar_new, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, collisions, composition, external_source_settings, + num_diss_params, pdf_electron_normalisation_factor, ir) + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] /= sqrt(1.0 + vpa.grid[ivpa]^2) + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, + vperp_adv, vperp_diffusion) + end + if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin @@ -1297,10 +1288,8 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo end end end - end - if z.irank == z.nrank - 1 - iz = z.n - @loop_r ir begin + if z.irank == z.nrank - 1 + iz = z.n v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin @@ -1310,32 +1299,37 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo end end end + begin_z_region() + @loop_z iz begin + @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], + f_electron_new[:,:,iz], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), + vpa) + end + return nothing end - begin_r_z_region() - @loop_r_z ir iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz,ir], f_electron_new[:,:,iz,ir], - (evolve_density=true, evolve_upar=true, evolve_ppar=true), - vpa) - end - return nothing - end - - residual = (scratch_dummy.implicit_buffer_zr_1, - scratch_dummy.implicit_buffer_vpavperpzr_1) - delta_x = (scratch_dummy.implicit_buffer_zr_2, - scratch_dummy.implicit_buffer_vpavperpzr_2) - rhs_delta = (scratch_dummy.implicit_buffer_zr_3, - scratch_dummy.implicit_buffer_vpavperpzr_3) - v = (scratch_dummy.implicit_buffer_zr_4, - scratch_dummy.implicit_buffer_vpavperpzr_4) - w = (scratch_dummy.implicit_buffer_zr_5, - scratch_dummy.implicit_buffer_vpavperpzr_5) - newton_success = newton_solve!((electron_ppar_out, pdf_electron_out), residual_func!, - residual, delta_x, rhs_delta, v, w, nl_solver_params; - left_preconditioner=nothing, - right_preconditioner=nothing, - coords=(r=r, z=z, vperp=vperp, vpa=vpa)) + residual = (scratch_dummy.implicit_buffer_z_1, + scratch_dummy.implicit_buffer_vpavperpz_1) + delta_x = (scratch_dummy.implicit_buffer_z_2, + scratch_dummy.implicit_buffer_vpavperpz_2) + rhs_delta = (scratch_dummy.implicit_buffer_z_3, + scratch_dummy.implicit_buffer_vpavperpz_3) + v = (scratch_dummy.implicit_buffer_z_4, + scratch_dummy.implicit_buffer_vpavperpz_4) + w = (scratch_dummy.implicit_buffer_z_5, + scratch_dummy.implicit_buffer_vpavperpz_5) + + @views newton_success = newton_solve!((electron_ppar_out[:,ir], + pdf_electron_out[:,:,:,ir]), + residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params; + left_preconditioner=nothing, + right_preconditioner=nothing, + coords=(r=r, z=z, vperp=vperp, vpa=vpa)) + end # Fill pdf.electron.norm non_scratch_pdf = pdf.electron.norm @@ -1443,6 +1437,40 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp end end +function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp, + vpa, vperp_spectral, vpa_spectral, + vpa_advect, num_diss_params, composition, + ir) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0) + end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], + z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi) + + begin_z_region() + A = moments.electron.constraints_A_coefficient + B = moments.electron.constraints_B_coefficient + C = moments.electron.constraints_C_coefficient + skip_first = z.irank == 0 && z.bc != "periodic" + skip_last = z.irank == z.nrank - 1 && z.bc != "periodic" + @loop_z iz begin + if (iz == 1 && skip_first) || (iz == z.n && skip_last) + continue + end + (A[iz,ir], B[iz,ir], C[iz,ir]) = + @views hard_force_moment_constraints!(f_electron[:,:,iz], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), vpa) + end +end + function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, @@ -2354,83 +2382,88 @@ function update_electron_pdf_with_picard_iteration!(pdf, dens, vthe, ppar, ddens end """ - electron_kinetic_equation_euler_update!(fvec, pdf, moments, z, vperp, vpa, - z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - num_diss_params, dt; evolve_ppar=false) + electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, moments, + z, vperp, vpa, z_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + collisions, composition, + external_source_settings, + num_diss_params, dt, ir; + evolve_ppar=false, ion_dt=nothing) Do a forward-Euler update of the electron kinetic equation. When `evolve_ppar=true` is passed, also updates the electron parallel pressure. + +Note that this function operates on a single point in `r`, given by `ir`, and `f_out`, +`ppar_out`, `f_in`, and `ppar_in` should have no r-dimension. """ -function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, vperp, - vpa, z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, dt; evolve_ppar=false, - ion_dt=nothing) +function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, moments, + z, vperp, vpa, z_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + collisions, composition, + external_source_settings, + num_diss_params, dt, ir; + evolve_ppar=false, ion_dt=nothing) # add the contribution from the z advection term - electron_z_advection!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.upar, moments.electron.vth, z_advect, z, - vpa.grid, z_spectral, scratch_dummy, dt) + @views electron_z_advection!(f_out, f_in, moments.electron.upar[:,ir], + moments.electron.vth[:,ir], z_advect, z, vpa.grid, + z_spectral, scratch_dummy, dt, ir) # add the contribution from the wpa advection term - electron_vpa_advection!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, - fvec_in.electron_ppar, moments, vpa_advect, vpa, vpa_spectral, - scratch_dummy, dt, external_source_settings.electron) + @views electron_vpa_advection!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], ppar_in, moments, + vpa_advect, vpa, vpa_spectral, scratch_dummy, dt, + external_source_settings.electron, ir) # add in the contribution to the residual from the term proportional to the pdf - add_contribution_from_pdf_term!(fvec_out.pdf_electron, fvec_in.pdf_electron, - fvec_in.electron_ppar, moments.electron.dens, - moments.electron.upar, moments, vpa.grid, z, dt, - external_source_settings.electron) + add_contribution_from_pdf_term!(f_out, f_in, ppar_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments, vpa.grid, z, dt, + external_source_settings.electron, ir) # add in numerical dissipation terms - add_dissipation_term!(fvec_out.pdf_electron, fvec_in.pdf_electron, scratch_dummy, - z_spectral, z, vpa, vpa_spectral, num_diss_params, dt) + add_dissipation_term!(f_out, f_in, scratch_dummy, z_spectral, z, vpa, vpa_spectral, + num_diss_params, dt) if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 # Add a Krook collision operator # Set dt=-1 as we update the residual here rather than adding an update to # 'fvec_out'. - electron_krook_collisions!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, - moments.ion.upar, moments.electron.vth, collisions, - vperp, vpa, dt) + @views electron_krook_collisions!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], + moments.ion.upar[:,ir], + moments.electron.vth[:,ir], collisions, vperp, + vpa, dt) end if external_source_settings.electron.active - external_electron_source!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, moments, - composition, external_source_settings.electron, vperp, - vpa, dt) + @views external_electron_source!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments, + composition, external_source_settings.electron, + vperp, vpa, dt, ir) end if evolve_ppar - electron_energy_equation!(fvec_out.electron_ppar, fvec_in.electron_ppar, - moments.electron.dens, moments.electron.upar, - moments.ion.dens, moments.ion.upar, moments.ion.ppar, - moments.neutral.dens, moments.neutral.uz, - moments.neutral.pz, moments.electron, collisions, dt, - composition, external_source_settings.electron, - num_diss_params, z) + @views electron_energy_equation_no_r!( + ppar_out, ppar_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments.ion.dens[:,ir], + moments.ion.upar[:,ir], moments.ion.ppar[:,ir], + moments.neutral.dens[:,ir], moments.neutral.uz[:,ir], + moments.neutral.pz[:,ir], moments.electron, collisions, dt, + composition, external_source_settings.electron, num_diss_params, z, ir) if ion_dt !== nothing # Add source term to turn steady state solution into a backward-Euler update of # electron_ppar with the ion timestep `ion_dt`. - ppar_out = fvec_out.electron_ppar - ppar_in = fvec_in.electron_ppar ppar_previous_ion_step = moments.electron.ppar - begin_r_z_region() - @loop_r_z ir iz begin + begin_z_region() + @loop_z iz begin # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a # source/damping term so that in the steady state of the electron # pseudo-timestepping iteration, # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, # resulting in a backward-Euler step (as long as the pseudo-timestepping # loop converges). - ppar_out[iz,ir] += -dt * (ppar_in[iz,ir] - ppar_previous_ion_step[iz,ir]) / ion_dt + ppar_out[iz] += -dt * (ppar_in[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt end end end @@ -2608,29 +2641,15 @@ end function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vpa, vpa_spectral, num_diss_params, dt) - dummy_zr1 = @view scratch_dummy.dummy_zrs[:,:,1] - dummy_zr2 = @view scratch_dummy.buffer_vpavperpzr_1[1,1,:,:] - buffer_r_1 = @view scratch_dummy.buffer_rs_1[:,1] - buffer_r_2 = @view scratch_dummy.buffer_rs_2[:,1] - buffer_r_3 = @view scratch_dummy.buffer_rs_3[:,1] - buffer_r_4 = @view scratch_dummy.buffer_rs_4[:,1] - # add in numerical dissipation terms - #@loop_vperp_vpa ivperp ivpa begin - # @views derivative_z!(dummy_zr1, pdf_in[ivpa,ivperp,:,:], buffer_r_1, buffer_r_2, buffer_r_3, - # buffer_r_4, z_spectral, z) - # @views derivative_z!(dummy_zr2, dummy_zr1, buffer_r_1, buffer_r_2, buffer_r_3, - # buffer_r_4, z_spectral, z) - # @. residual[ivpa,ivperp,:,:] -= num_diss_params.electron.z_dissipation_coefficient * dummy_zr2 - #end - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin - #@views derivative!(vpa.scratch, pdf_in[:,ivperp,iz,ir], vpa, false) - #@views derivative!(vpa.scratch2, vpa.scratch, vpa, false) - #@. residual[:,ivperp,iz,ir] -= num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch2 - @views second_derivative!(vpa.scratch, pdf_in[:,ivperp,iz,ir], vpa, vpa_spectral) - @. pdf_out[:,ivperp,iz,ir] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch + if num_diss_params.electron.vpa_dissipation_coefficient ≤ 0.0 + return nothing + end + + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views second_derivative!(vpa.scratch, pdf_in[:,ivperp,iz], vpa, vpa_spectral) + @. pdf_out[:,ivperp,iz] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch end - #stop() return nothing end @@ -2833,39 +2852,38 @@ end # add contribution to the residual coming from the term proporational to the pdf function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, moments, vpa, - z, dt, electron_source_settings) - vth = moments.electron.vth - ddens_dz = moments.electron.ddens_dz - dvth_dz = moments.electron.dvth_dz - dqpar_dz = moments.electron.dqpar_dz - begin_r_z_vperp_vpa_region() - @loop_r_z ir iz begin - this_dqpar_dz = dqpar_dz[iz,ir] - this_ppar = ppar[iz,ir] - this_vth = vth[iz,ir] - this_ddens_dz = ddens_dz[iz,ir] - this_dens = dens[iz,ir] - this_dvth_dz = dvth_dz[iz,ir] - this_vth = vth[iz,ir] + z, dt, electron_source_settings, ir) + vth = @view moments.electron.vth[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + begin_z_vperp_vpa_region() + @loop_z iz begin + this_dqpar_dz = dqpar_dz[iz] + this_ppar = ppar[iz] + this_vth = vth[iz] + this_ddens_dz = ddens_dz[iz] + this_dens = dens[iz] + this_dvth_dz = dvth_dz[iz] + this_vth = vth[iz] @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] += + pdf_out[ivpa,ivperp,iz] += dt * (-0.5 * this_dqpar_dz / this_ppar - vpa[ivpa] * this_vth * (this_ddens_dz / this_dens - this_dvth_dz / this_vth)) * - pdf_in[ivpa,ivperp,iz,ir] - #pdf_out[ivpa, ivperp, :, :] -= (-0.5 * dqpar_dz[:, :] / ppar[:, :]) * pdf_in[ivpa, ivperp, :, :] + pdf_in[ivpa,ivperp,iz] end end if electron_source_settings.active - source_density_amplitude = moments.electron.external_source_density_amplitude - source_momentum_amplitude = moments.electron.external_source_momentum_amplitude - source_pressure_amplitude = moments.electron.external_source_pressure_amplitude - @loop_r_z ir iz begin - term = dt * (1.5 * source_density_amplitude[iz,ir] / dens[iz,ir] - - (0.5 * source_pressure_amplitude[iz,ir] + - source_momentum_amplitude[iz,ir]) / ppar[iz,ir]) + source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir] + @loop_z iz begin + term = dt * (1.5 * source_density_amplitude[iz] / dens[iz] - + (0.5 * source_pressure_amplitude[iz] + + source_momentum_amplitude[iz]) / ppar[iz]) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= term * pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= term * pdf_in[ivpa,ivperp,iz] end end end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 2d7968286..49072449d 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -14,39 +14,33 @@ calculate the wpa-advection term for the electron kinetic equation """ function electron_vpa_advection!(pdf_out, pdf_in, density, upar, ppar, moments, advect, vpa, spectral, scratch_dummy, dt, - electron_source_settings) - begin_r_z_vperp_region() + electron_source_settings, ir) + begin_z_vperp_region() # create a reference to a scratch_dummy array to store the wpa-derivative of the electron pdf - dpdf_dvpa = scratch_dummy.buffer_vpavperpzr_1 - #d2pdf_dvpa2 = scratch_dummy.buffer_vpavperpzr_2 - begin_r_z_vperp_region() + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + #d2pdf_dvpa2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + # get the updated speed along the wpa direction using the current pdf @views update_electron_speed_vpa!(advect[1], density, upar, ppar, moments, vpa.grid, - electron_source_settings) - # update adv_fac -- note that there is no factor of dt here because - # in some cases the electron kinetic equation is solved as a steady-state equation iteratively - @loop_r_z_vperp ir iz ivperp begin + electron_source_settings, ir) + # update adv_fac + @loop_z_vperp iz ivperp begin @views @. advect[1].adv_fac[:,ivperp,iz,ir] = -advect[1].speed[:,ivperp,iz,ir] end #calculate the upwind derivative of the electron pdf w.r.t. wpa - @loop_r_z_vperp ir iz ivperp begin - @views derivative!(dpdf_dvpa[:,ivperp,iz,ir], pdf_in[:,ivperp,iz,ir], vpa, + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], pdf_in[:,ivperp,iz], vpa, advect[1].adv_fac[:,ivperp,iz,ir], spectral) end - #@loop_r_z_vperp ir iz ivperp begin - # @views second_derivative!(d2pdf_dvpa2[:,ivperp,iz,ir], pdf_in[:,ivperp,iz,ir], vpa, spectral) + #@loop_z_vperp iz ivperp begin + # @views second_derivative!(d2pdf_dvpa2[:,ivperp,iz], pdf_in[:,ivperp,iz], vpa, spectral) #end # calculate the advection term - @loop_r_z_vperp ir iz ivperp begin - @. pdf_out[:,ivperp,iz,ir] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz,ir] - #@. pdf_out[:,ivperp,iz,ir] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz,ir] + 0.0001*d2pdf_dvpa2[:,ivperp,iz,ir] + @loop_z_vperp iz ivperp begin + @. pdf_out[:,ivperp,iz] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + #@. pdf_out[:,ivperp,iz] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + 0.0001*d2pdf_dvpa2[:,ivperp,iz] end - #@loop_vpa ivpa begin - # println("electron_vpa_advection: ", pdf_out[ivpa,1,10,1], " vpa: ", vpa.grid[ivpa], " dpdf_dvpa: ", dpdf_dvpa[ivpa,1,10,1], - # " pdf: ", pdf[ivpa,1,10,1]) - #end - #exit() return nothing end @@ -54,29 +48,27 @@ end calculate the electron advection speed in the wpa-direction at each grid point """ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, - electron_source_settings) - vth = moments.electron.vth - dppar_dz = moments.electron.dppar_dz - dqpar_dz = moments.electron.dqpar_dz - dvth_dz = moments.electron.dvth_dz + electron_source_settings, ir) + vth = @view moments.electron.vth[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] # calculate the advection speed in wpa - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - # TMP FOR TESTING - #advect.speed[ivpa,ivperp,iz,ir] = vth[iz,ir] * dppar_dz[iz,ir] / (2 * ppar[iz,ir]) - advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz,ir] * dppar_dz[iz,ir] + vpa[ivpa] * dqpar_dz[iz,ir]) - / (2 * ppar[iz,ir]) - vpa[ivpa]^2 * dvth_dz[iz,ir]) + @loop_z_vperp_vpa iz ivperp ivpa begin + advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) + / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz]) end if electron_source_settings.active - source_density_amplitude = moments.electron.external_source_density_amplitude - source_momentum_amplitude = moments.electron.external_source_momentum_amplitude - source_pressure_amplitude = moments.electron.external_source_pressure_amplitude - @loop_r_z ir iz begin - term1 = source_density_amplitude[iz,ir] * upar[iz,ir]/(density[iz,ir]*vth[iz,ir]) + source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir] + @loop_z iz begin + term1 = source_density_amplitude[iz] * upar[iz]/(density[iz]*vth[iz]) term2_over_vpa = - -0.5 * (source_pressure_amplitude[iz,ir] + - 2.0 * upar[iz,ir] * source_momentum_amplitude[iz,ir]) / - ppar[iz,ir] + - 0.5 * source_density_amplitude[iz,ir] / density[iz,ir] + -0.5 * (source_pressure_amplitude[iz] + + 2.0 * upar[iz] * source_momentum_amplitude[iz]) / + ppar[iz] + + 0.5 * source_density_amplitude[iz] / density[iz] @loop_vperp_vpa ivperp ivpa begin advect.speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa end @@ -84,5 +76,14 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, end return nothing end +# Alternative version with loop over r is used for adaptive timestep update +function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, + electron_source_settings) + @loop_r ir begin + @views update_electron_speed_vpa!(advect, density[:,ir], upar[:,ir], ppar[:,ir], + moments, vpa, electron_source_settings, ir) + end + return nothing +end end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index b0b8f26aa..39076384d 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -8,40 +8,43 @@ export update_electron_speed_z! using ..advection: advance_f_df_precomputed! using ..chebyshev: chebyshev_info using ..looping -using ..derivatives: derivative_z! -using ..calculus: second_derivative!, derivative! +using ..derivatives: derivative_z_pdf_vpavperpz! +using ..calculus: second_derivative! """ calculate the z-advection term for the electron kinetic equation = wpa * vthe * df/dz """ function electron_z_advection!(pdf_out, pdf_in, upar, vth, advect, z, vpa, spectral, - scratch_dummy, dt) - begin_r_vperp_vpa_region() + scratch_dummy, dt, ir) + begin_vperp_vpa_region() # create a pointer to a scratch_dummy array to store the z-derivative of the electron pdf - dpdf_dz = scratch_dummy.buffer_vpavperpzr_1 - d2pdf_dz2 = scratch_dummy.buffer_vpavperpzr_2 + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + d2pdf_dz2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] begin_r_vperp_vpa_region() # get the updated speed along the z direction using the current pdf - @views update_electron_speed_z!(advect[1], upar, vth, vpa) + @views update_electron_speed_z!(advect[1], upar, vth, vpa, ir) # update adv_fac -- note that there is no factor of dt here because # in some cases the electron kinetic equation is solved as a steady-state equation iteratively - @loop_r_vperp_vpa ir ivperp ivpa begin + @loop_vperp_vpa ivperp ivpa begin @views advect[1].adv_fac[:,ivpa,ivperp,ir] = -advect[1].speed[:,ivpa,ivperp,ir] end #calculate the upwind derivative - derivative_z!(dpdf_dz, pdf_in, - advect, scratch_dummy.buffer_vpavperpr_1, - scratch_dummy.buffer_vpavperpr_2, scratch_dummy.buffer_vpavperpr_3, - scratch_dummy.buffer_vpavperpr_4, scratch_dummy.buffer_vpavperpr_5, - scratch_dummy.buffer_vpavperpr_6, spectral, z) - #@loop_r_vperp_vpa ir ivperp ivpa begin - # @views second_derivative!(d2pdf_dz2[ivpa,ivperp,:,ir], pdf[ivpa,ivperp,:,ir], z, spectral) + @views derivative_z_pdf_vpavperpz!( + dpdf_dz, pdf_in, advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], spectral, z) + #@loop_vperp_vpa ivperp ivpa begin + # @views second_derivative!(d2pdf_dz2[ivpa,ivperp,:], pdf_in[ivpa,ivperp,:], z, spectral) #end # calculate the advection term - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz,ir] - #pdf_out[ivpa,ivperp,iz,ir] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz,ir] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz,ir] + @loop_z_vperp_vpa iz ivperp ivpa begin + pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + #pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz] end return nothing end @@ -49,11 +52,18 @@ end """ calculate the electron advection speed in the z-direction at each grid point """ -function update_electron_speed_z!(advect, upar, vth, vpa) +function update_electron_speed_z!(advect, upar, vth, vpa, ir) # the electron advection speed in z is v_par = w_par * v_the - @loop_r_vperp_vpa ir ivperp ivpa begin - #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth[:,ir] - @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth[:,ir] + upar[:,ir] + @loop_vperp_vpa ivperp ivpa begin + #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar + end + return nothing +end +# Alternative version with loop over r is used for adaptive timestep update +function update_electron_speed_z!(advect, upar, vth, vpa) + @loop_r ir begin + @views update_electron_speed_z!(advect, upar[:,ir], vth[:,ir], vpa, ir) end return nothing end diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 98fe4169c..3a5840b56 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -752,15 +752,18 @@ end vpa, dt) Add external source term to the electron kinetic equation. + +Note that this function operates on a single point in `r`, given by `ir`, and `pdf_out`, +`pdf_in`, `electron_density`, and `electron_upar` should have no r-dimension. """ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_source_settings, vperp, - vpa, dt) - begin_r_z_vperp_region() + vpa, dt, ir) + begin_z_vperp_region() me_over_mi = composition.me_over_mi - source_amplitude = moments.electron.external_source_amplitude + source_amplitude = @view moments.electron.external_source_amplitude[:,ir] source_T = electron_source_settings.source_T if vperp.n == 1 vth_factor = 1.0 / sqrt(source_T / me_over_mi) @@ -770,18 +773,18 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u vpa_grid = vpa.grid vperp_grid = vperp.grid - vth = moments.electron.vth - @loop_r_z ir iz begin - this_vth = vth[iz,ir] - this_upar = electron_upar[iz,ir] - this_prefactor = dt * this_vth / electron_density[iz,ir] * vth_factor * - source_amplitude[iz,ir] + vth = @view moments.electron.vth[:,ir] + @loop_z iz begin + this_vth = vth[iz] + this_upar = electron_upar[iz] + this_prefactor = dt * this_vth / electron_density[iz] * vth_factor * + source_amplitude[iz] @loop_vperp_vpa ivperp ivpa begin # Factor of 1/sqrt(π) (for 1V) or 1/π^(3/2) (for 2V/3V) is absorbed by the # normalisation of F vperp_unnorm = vperp_grid[ivperp] * this_vth vpa_unnorm = vpa_grid[ivpa] * this_vth + this_upar - pdf_out[ivpa,ivperp,iz,ir] += + pdf_out[ivpa,ivperp,iz] += this_prefactor * exp(-(vperp_unnorm^2 + vpa_unnorm^2) * me_over_mi / source_T) end @@ -789,9 +792,9 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u if electron_source_settings.source_type == "energy" # Take particles out of pdf so source does not change density - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * source_amplitude[iz,ir] * - pdf_in[ivpa,ivperp,iz,ir] + @loop_z_vperp_vpa iz ivperp ivpa begin + pdf_out[ivpa,ivperp,iz] -= dt * source_amplitude[iz] * + pdf_in[ivpa,ivperp,iz] end end diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 1880610e9..72f41d9f6 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -262,10 +262,13 @@ end """ Add Krook collision operator for electrons + +Note that this function operates on a single point in `r`, so `pdf_out`, `pdf_in`, +`dens_in`, `upar_in`, `upar_ion_in`, and `vth_in` should have no r-dimension. """ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_in, vth_in, collisions, vperp, vpa, dt) - begin_r_z_region() + begin_z_region() # For now, electrons are always fully moment-kinetic evolve_density = true @@ -281,9 +284,9 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ if evolve_ppar && evolve_upar # Compared to evolve_upar version, grid is already normalized by vth, and multiply # through by vth, remembering pdf is already multiplied by vth - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @@ -292,13 +295,13 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - @. vpa.scratch = vpa.grid + (upar_ion_in[iz,ir,1] - upar_in[iz,ir]) / vth + @. vpa.scratch = vpa.grid + (upar_ion_in[iz,1] - upar_in[iz]) / vth @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - exp(-vpa.scratch[ivpa]^2 - vperp.grid[ivperp]^2)) ) end @@ -306,33 +309,33 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ elseif evolve_ppar # Compared to full-f collision operater, multiply through by vth, remembering pdf # is already multiplied by vth, and grid is already normalized by vth - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] - - exp(-((vpa.grid[ivpa] - upar_in[iz,ir])/vth)^2 + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] + - exp(-((vpa.grid[ivpa] - upar_in[iz])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] - - exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1])/vth)^2 + + nu_ei * (pdf_in[ivpa,ivperp,iz] + - exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end end elseif evolve_upar # Compared to evolve_density version, grid is already shifted by upar - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @@ -341,14 +344,14 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - @. vpa.scratch = vpa.grid + (upar_ion_in[iz,ir,1] - upar_in[iz,ir]) + @. vpa.scratch = vpa.grid + (upar_ion_in[iz,1] - upar_in[iz]) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth * exp(-(vpa.grid[ivpa] / vth)^2 - (vperp.grid[ivperp] / vth)^2)) - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth * exp(-(vpa.scratch[ivpa] / vth)^2 - (vperp.grid[ivperp] / vth)^2)) ) @@ -357,33 +360,33 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ elseif evolve_density # Compared to full-f collision operater, divide through by density, remembering # that pdf is already normalized by density - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth - * exp(-((vpa.grid[ivpa] - upar_in[iz,ir]) / vth)^2 + * exp(-((vpa.grid[ivpa] - upar_in[iz]) / vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth - * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1]) / vth)^2 + * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1]) / vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end end else - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] if vperp.n == 1 vth_prefactor = 1.0 / vth else @@ -392,19 +395,19 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - n * vth_prefactor - * exp(-((vpa.grid[ivpa] - upar_in[iz,ir])/vth)^2 + * exp(-((vpa.grid[ivpa] - upar_in[iz])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ee * (pdf_in[ivpa,ivperp,iz] - n * vth_prefactor - * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1])/vth)^2 + * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index f4c887b84..154791890 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -4434,8 +4434,10 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t begin_serial_region() # Only need some struct with a 'speed' variable advect = (speed=@view(speed[:,:,:,:,it]),) - @views update_electron_speed_z!(advect, upar[:,:,it], vth[:,:,it], - run_info.vpa.grid) + for ir ∈ 1:run_info.r.n + @views update_electron_speed_z!(advect, upar[:,ir,it], vth[:,ir,it], + run_info.vpa.grid, ir) + end end # Horrible hack so that we can get the speed back without rearranging the @@ -4512,9 +4514,12 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t external_source_density_amplitude=external_source_density_amplitude[:,:,it], external_source_momentum_amplitude=external_source_momentum_amplitude[:,:,it], external_source_pressure_amplitude=external_source_pressure_amplitude[:,:,it]),) - @views update_electron_speed_vpa!(advect, density[:,:,it], upar[:,:,it], - ppar[:,:,it], moments, run_info.vpa.grid, - run_info.external_source_settings.electron) + for ir ∈ 1:run_info.r.n + @views update_electron_speed_vpa!(advect, density[:,ir,it], upar[:,ir,it], + ppar[:,ir,it], moments, run_info.vpa.grid, + run_info.external_source_settings.electron, + ir) + end end variable = speed diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 4595a6b96..c898f32ad 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -104,7 +104,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol V .= 0.0 elseif electron_ppar_pdf_solve H = allocate_shared_float(linear_restart + 1, linear_restart) - V_ppar = allocate_shared_float(coords.z.n, coords.r.n, linear_restart+1) + V_ppar = allocate_shared_float(coords.z.n, linear_restart+1) V_pdf = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @@ -409,10 +409,10 @@ function get_distributed_norm(coords, rtol, atol, x) this_norm = distributed_norm_z elseif dims == (:vpa,) this_norm = distributed_norm_vpa - elseif dims == (:r, :z, :vperp, :vpa) + elseif dims == (:z, :vperp, :vpa) # Intended for implicit solve combining electron_ppar and pdf_electron, so will # not work for a single variable. - this_norm = distributed_norm_r_z_vperp_vpa + this_norm = distributed_norm_z_vperp_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) this_norm = distributed_norm_s_r_z_vperp_vpa else @@ -473,34 +473,28 @@ function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol return residual_norm end -function distributed_norm_r_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}; - coords, rtol, atol, x) +function distributed_norm_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}; + coords, rtol, atol, x) ppar_residual, pdf_residual = residual x_ppar, x_pdf = x - r = coords.r z = coords.z vperp = coords.vperp vpa = coords.vpa - if r.irank < r.nrank - 1 - rend = r.n - else - rend = r.n + 1 - end if z.irank < z.nrank - 1 zend = z.n else zend = z.n + 1 end - begin_r_z_region() + begin_z_region() ppar_local_norm_square = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end - ppar_local_norm_square += (ppar_residual[iz,ir] / (rtol * abs(x_ppar[iz,ir]) + atol))^2 + ppar_local_norm_square += (ppar_residual[iz] / (rtol * abs(x_ppar[iz]) + atol))^2 end _block_synchronize() @@ -508,20 +502,20 @@ function distributed_norm_r_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, if block_rank[] == 0 ppar_global_norm_square = MPI.Allreduce(ppar_block_norm_square, +, comm_inter_block[]) - ppar_global_norm_square = ppar_global_norm_square / (r.n_global * z.n_global) + ppar_global_norm_square = ppar_global_norm_square / z.n_global else ppar_global_norm_square = nothing end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() pdf_local_norm_square = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end @loop_vperp_vpa ivperp ivpa begin - pdf_local_norm_square += (pdf_residual[ivpa,ivperp,iz,ir] / (rtol * abs(x_pdf[ivpa,ivperp,iz,ir]) + atol))^2 + pdf_local_norm_square += (pdf_residual[ivpa,ivperp,iz] / (rtol * abs(x_pdf[ivpa,ivperp,iz]) + atol))^2 end end @@ -530,7 +524,7 @@ function distributed_norm_r_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, if block_rank[] == 0 pdf_global_norm_square = MPI.Allreduce(pdf_block_norm_square, +, comm_inter_block[]) - pdf_global_norm_square = pdf_global_norm_square / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) + pdf_global_norm_square = pdf_global_norm_square / (z.n_global * vperp.n_global * vpa.n_global) global_norm = sqrt(mean((ppar_global_norm_square, pdf_global_norm_square))) else @@ -598,10 +592,10 @@ function get_distributed_dot(coords, rtol, atol, x) this_dot = distributed_dot_z elseif dims == (:vpa,) this_dot = distributed_dot_vpa - elseif dims == (:r, :z, :vperp, :vpa) + elseif dims == (:z, :vperp, :vpa) # Intended for implicit solve combining electron_ppar and pdf_electron, so will # not work for a single variable. - this_dot = distributed_dot_r_z_vperp_vpa + this_dot = distributed_dot_z_vperp_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) this_dot = distributed_dot_s_r_z_vperp_vpa else @@ -663,37 +657,31 @@ function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_ return local_dot end -function distributed_dot_r_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, - w::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}; - coords, atol, rtol, x) +function distributed_dot_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + w::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}; + coords, atol, rtol, x) v_ppar, v_pdf = v w_ppar, w_pdf = w x_ppar, x_pdf = x - r = coords.r z = coords.z vperp = coords.vperp vpa = coords.vpa - if r.irank < r.nrank - 1 - rend = r.n - else - rend = r.n + 1 - end if z.irank < z.nrank - 1 zend = z.n else zend = z.n + 1 end - begin_r_z_region() + begin_z_region() ppar_local_dot = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end - ppar_local_dot += v_ppar[iz,ir] * w_ppar[iz,ir] / (rtol * abs(x_ppar[iz,ir]) + atol)^2 + ppar_local_dot += v_ppar[iz] * w_ppar[iz] / (rtol * abs(x_ppar[iz]) + atol)^2 end _block_synchronize() @@ -701,19 +689,19 @@ function distributed_dot_r_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 2},Abstr if block_rank[] == 0 ppar_global_dot = MPI.Allreduce(ppar_block_dot, +, comm_inter_block[]) - ppar_global_dot = ppar_global_dot / (r.n_global * z.n_global) + ppar_global_dot = ppar_global_dot / z.n_global else ppar_global_dot = nothing end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() pdf_local_dot = 0.0 - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - if ir == rend || iz == zend + @loop_z_vperp_vpa iz ivperp ivpa begin + if iz == zend continue end - pdf_local_dot += v_pdf[ivpa,ivperp,iz,ir] * w_pdf[ivpa,ivperp,iz,ir] / (rtol * abs(x_pdf[ivpa,ivperp,iz,ir]) + atol)^2 + pdf_local_dot += v_pdf[ivpa,ivperp,iz] * w_pdf[ivpa,ivperp,iz] / (rtol * abs(x_pdf[ivpa,ivperp,iz]) + atol)^2 end _block_synchronize() @@ -721,7 +709,7 @@ function distributed_dot_r_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 2},Abstr if block_rank[] == 0 pdf_global_dot = MPI.Allreduce(pdf_block_dot, +, comm_inter_block[]) - pdf_global_dot = pdf_global_dot / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) + pdf_global_dot = pdf_global_dot / (z.n_global * vperp.n_global * vpa.n_global) global_dot = mean((ppar_global_dot, pdf_global_dot)) else @@ -786,10 +774,10 @@ function get_parallel_map(coords) return parallel_map_z elseif dims == (:vpa,) return parallel_map_vpa - elseif dims == (:r, :z, :vperp, :vpa) + elseif dims == (:z, :vperp, :vpa) # Intended for implicit solve combining electron_ppar and pdf_electron, so will # not work for a single variable. - return parallel_map_r_z_vperp_vpa + return parallel_map_z_vperp_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) return parallel_map_s_r_z_vperp_vpa else @@ -857,59 +845,59 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2) return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}) +function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}) result_ppar, result_pdf = result - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin - result_ppar[iz,ir] = func() + @loop_z iz begin + result_ppar[iz] = func() end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func() + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func() end return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, x1) +function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin - result_ppar[iz,ir] = func(x1_ppar[iz,ir]) + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz]) end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func(x1_pdf[ivpa,ivperp,iz,ir]) + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz]) end return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, x1, x2) +function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 x2_ppar, x2_pdf = x2 - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin - result_ppar[iz,ir] = func(x1_ppar[iz,ir], x2_ppar[iz,ir]) + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz]) end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func(x1_pdf[ivpa,ivperp,iz,ir], x2_pdf[ivpa,ivperp,iz,ir]) + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz]) end return nothing @@ -958,10 +946,10 @@ function get_parallel_delta_x_calc(coords) return parallel_delta_x_calc_z elseif dims == (:vpa,) return parallel_delta_x_calc_vpa - elseif dims == (:r, :z, :vperp, :vpa) + elseif dims == (:z, :vperp, :vpa) # Intended for implicit solve combining electron_ppar and pdf_electron, so will # not work for a single variable. - return parallel_delta_x_calc_r_z_vperp_vpa + return parallel_delta_x_calc_z_vperp_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) return parallel_delta_x_calc_s_r_z_vperp_vpa else @@ -996,26 +984,26 @@ function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) return nothing end -function parallel_delta_x_calc_r_z_vperp_vpa(delta_x::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, V, y) +function parallel_delta_x_calc_z_vperp_vpa(delta_x::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, V, y) delta_x_ppar, delta_x_pdf = delta_x V_ppar, V_pdf = V ny = length(y) - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin + @loop_z iz begin for iy ∈ 1:ny - delta_x_ppar[iz,ir] += y[iy] * V_ppar[iz,ir,iy] + delta_x_ppar[iz] += y[iy] * V_ppar[iz,iy] end end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + @loop_z_vperp_vpa iz ivperp ivpa begin for iy ∈ 1:ny - delta_x_pdf[ivpa,ivperp,iz,ir] += y[iy] * V_pdf[ivpa,ivperp,iz,ir,iy] + delta_x_pdf[ivpa,ivperp,iz] += y[iy] * V_pdf[ivpa,ivperp,iz,iy] end end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 146e834b6..a1db148c6 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -154,19 +154,19 @@ struct scratch_dummy_arrays buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} # buffers to hold moment quantities for implicit solves - implicit_buffer_zr_1::MPISharedArray{mk_float,2} - implicit_buffer_zr_2::MPISharedArray{mk_float,2} - implicit_buffer_zr_3::MPISharedArray{mk_float,2} - implicit_buffer_zr_4::MPISharedArray{mk_float,2} - implicit_buffer_zr_5::MPISharedArray{mk_float,2} - implicit_buffer_zr_6::MPISharedArray{mk_float,2} + implicit_buffer_z_1::MPISharedArray{mk_float,1} + implicit_buffer_z_2::MPISharedArray{mk_float,1} + implicit_buffer_z_3::MPISharedArray{mk_float,1} + implicit_buffer_z_4::MPISharedArray{mk_float,1} + implicit_buffer_z_5::MPISharedArray{mk_float,1} + implicit_buffer_z_6::MPISharedArray{mk_float,1} # buffers to hold electron for implicit solves - implicit_buffer_vpavperpzr_1::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_2::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_3::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_4::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_5::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_6::MPISharedArray{mk_float,4} + implicit_buffer_vpavperpz_1::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_2::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_3::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_4::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_5::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_6::MPISharedArray{mk_float,3} # buffers to hold ion pdf for implicit solves implicit_buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} implicit_buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} @@ -668,8 +668,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop kinetic_electrons_with_temperature_equation) nl_solver_electron_advance_params = setup_nonlinear_solve(input_dict, - (r=r, z=z, vperp=vperp, vpa=vpa), - (); + (z=z, vperp=vperp, vpa=vpa), + (r,); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, @@ -1501,33 +1501,33 @@ function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, buffer_vpavperpr_6 = allocate_shared_float(nvpa,nvperp,nr) if t_params.implicit_electron_advance || true - implicit_buffer_zr_1 = allocate_shared_float(nz,nr) - implicit_buffer_zr_2 = allocate_shared_float(nz,nr) - implicit_buffer_zr_3 = allocate_shared_float(nz,nr) - implicit_buffer_zr_4 = allocate_shared_float(nz,nr) - implicit_buffer_zr_5 = allocate_shared_float(nz,nr) - implicit_buffer_zr_6 = allocate_shared_float(nz,nr) - - implicit_buffer_vpavperpzr_1 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_2 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_3 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_4 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_5 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_6 = allocate_shared_float(nvpa,nvperp,nz,nr) + implicit_buffer_z_1 = allocate_shared_float(nz) + implicit_buffer_z_2 = allocate_shared_float(nz) + implicit_buffer_z_3 = allocate_shared_float(nz) + implicit_buffer_z_4 = allocate_shared_float(nz) + implicit_buffer_z_5 = allocate_shared_float(nz) + implicit_buffer_z_6 = allocate_shared_float(nz) + + implicit_buffer_vpavperpz_1 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_2 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_3 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_4 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_5 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_6 = allocate_shared_float(nvpa,nvperp,nz) else - implicit_buffer_zr_1 = allocate_shared_float(0,0) - implicit_buffer_zr_2 = allocate_shared_float(0,0) - implicit_buffer_zr_3 = allocate_shared_float(0,0) - implicit_buffer_zr_4 = allocate_shared_float(0,0) - implicit_buffer_zr_5 = allocate_shared_float(0,0) - implicit_buffer_zr_6 = allocate_shared_float(0,0) + implicit_buffer_z_1 = allocate_shared_float(0) + implicit_buffer_z_2 = allocate_shared_float(0) + implicit_buffer_z_3 = allocate_shared_float(0) + implicit_buffer_z_4 = allocate_shared_float(0) + implicit_buffer_z_5 = allocate_shared_float(0) + implicit_buffer_z_6 = allocate_shared_float(0) - implicit_buffer_vpavperpzr_1 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_2 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_3 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_4 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_5 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_6 = allocate_shared_float(0,0,0,0) + implicit_buffer_vpavperpz_1 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_2 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_3 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_4 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_5 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_6 = allocate_shared_float(0,0,0) end if t_params.implicit_ion_advance @@ -1581,8 +1581,8 @@ function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6, buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6, buffer_vpavperpzrs_1,buffer_vpavperpzrs_2, - implicit_buffer_zr_1,implicit_buffer_zr_2,implicit_buffer_zr_3,implicit_buffer_zr_4,implicit_buffer_zr_5,implicit_buffer_zr_6, - implicit_buffer_vpavperpzr_1,implicit_buffer_vpavperpzr_2,implicit_buffer_vpavperpzr_3,implicit_buffer_vpavperpzr_4,implicit_buffer_vpavperpzr_5,implicit_buffer_vpavperpzr_6, + implicit_buffer_z_1,implicit_buffer_z_2,implicit_buffer_z_3,implicit_buffer_z_4,implicit_buffer_z_5,implicit_buffer_z_6, + implicit_buffer_vpavperpz_1,implicit_buffer_vpavperpz_2,implicit_buffer_vpavperpz_3,implicit_buffer_vpavperpz_4,implicit_buffer_vpavperpz_5,implicit_buffer_vpavperpz_6, implicit_buffer_vpavperpzrs_1,implicit_buffer_vpavperpzrs_2,implicit_buffer_vpavperpzrs_3,implicit_buffer_vpavperpzrs_4,implicit_buffer_vpavperpzrs_5,implicit_buffer_vpavperpzrs_6, buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6, buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6, @@ -3451,8 +3451,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, collisions, dt, composition, - external_source_settings.electron, num_diss_params, z; - conduction=advance.electron_conduction) + external_source_settings.electron, num_diss_params, r, + z; conduction=advance.electron_conduction) elseif advance.electron_conduction # Explicit version of the implicit part of the IMEX timestep, need to evaluate # only the conduction term. diff --git a/moment_kinetics/src/velocity_moments.jl b/moment_kinetics/src/velocity_moments.jl index 29684c26a..103d50a71 100644 --- a/moment_kinetics/src/velocity_moments.jl +++ b/moment_kinetics/src/velocity_moments.jl @@ -1008,6 +1008,60 @@ function calculate_electron_moment_derivatives!(moments, scratch, scratch_dummy, buffer_r_2, buffer_r_3, buffer_r_4, z_spectral, z) end +""" +Calculate spatial derivatives of the electron moments. + +This version, for use in implicit solvers for electrons, works with a single point in `r`, +given by `ir`. +""" +function calculate_electron_moment_derivatives_no_r!(moments, scratch, scratch_dummy, z, + z_spectral, electron_mom_diss_coeff, + ir) + begin_serial_region() + + dens = @view scratch.electron_density[:,ir] + upar = @view scratch.electron_upar[:,ir] + ppar = @view scratch.electron_ppar[:,ir] + qpar = @view moments.electron.qpar[:,ir] + vth = @view moments.electron.vth[:,ir] + dummy_z = @view scratch_dummy.dummy_zrs[:,ir,1] + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + @views derivative_z!(moments.electron.dupar_dz[:,ir], upar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + # centred second derivative for dissipation + if electron_mom_diss_coeff > 0.0 + derivative_z!(dummy_z, ppar, buffer_1, buffer_2, buffer_3, buffer_4, + z_spectral, z) + @views derivative_z!(moments.electron.d2ppar_dz2[:,ir], dummy_z, buffer_1, + buffer_2, buffer_3, buffer_4, z_spectral, z) + end + + @views derivative_z!(moments.electron.ddens_dz[:,ir], dens, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dppar_dz[:,ir], ppar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dqpar_dz[:,ir], qpar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dvth_dz[:,ir], vth, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + # calculate the zed derivative of the electron temperature + begin_z_region() + @loop_z iz begin + # store the temperature in dummy_zr + dummy_z[iz] = 2*ppar[iz,ir]/dens[iz,ir] + end + begin_serial_region() + @views derivative_z!(moments.electron.dT_dz[:,ir], dummy_z, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dvth_dz[:,ir], moments.electron.vth[:,ir], + buffer_1, buffer_2, buffer_3, buffer_4, z_spectral, z) +end + """ update velocity moments of the evolved neutral pdf """ From 986d933569502dde9889f7e6e5954661095a6401 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 18:25:04 +0100 Subject: [PATCH 017/107] Prevent ion timestep increase when electrons took too long to converge --- .../src/electron_kinetic_equation.jl | 15 +++++ moment_kinetics/src/input_structs.jl | 2 + moment_kinetics/src/time_advance.jl | 64 ++++++++++++++----- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 8dac6847e..b1334e108 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1127,6 +1127,21 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end end + + if r.n > 1 + error("Limits on iteration count and simtime assume 1D simulations. " + * "Need to fix handling of t_params.t[] and t_params.step_counter[], " + * "and also t_params.max_step_count_this_ion_step[] and " + * "t_params.max_t_increment_this_ion_step[]") + else + t_params.max_step_count_this_ion_step[] = + max(t_params.step_counter[] - initial_step_counter, + t_params.max_step_count_this_ion_step[]) + t_params.max_t_increment_this_ion_step[] = + max(t_params.t[] - initial_time, + t_params.max_t_increment_this_ion_step[]) + end + if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] # Reset dt in case it was reduced to be less than 0.5*ion_dt begin_serial_region() diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 30bc39cb2..f1505b0ca 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -55,6 +55,8 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero write_moments_output::MPISharedArray{Bool,1} write_dfns_output::MPISharedArray{Bool,1} step_counter::Ref{mk_int} + max_step_count_this_ion_step::Ref{mk_int} + max_t_increment_this_ion_step::Ref{mk_float} moments_output_counter::Ref{mk_int} dfns_output_counter::Ref{mk_int} failure_counter::Ref{mk_int} diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index a1db148c6..556dce9e3 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -441,12 +441,13 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, previous_dt_shared, next_output_time, dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_moments_output, step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), - Ref(0), Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], - t_input["nwrite_dfns"], moments_output_times, dfns_output_times, - t_input["type"], rk_coefs, rk_coefs_implicit, - implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, - low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"], - t_input["step_update_prefactor"], t_input["max_increase_factor"], + Ref(0), Ref{mk_float}(0.0), Ref(0), Ref(0), Ref(0), mk_int[], + mk_int[], t_input["nwrite"], t_input["nwrite_dfns"], + moments_output_times, dfns_output_times, t_input["type"], rk_coefs, + rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, + rk_order, adaptive, low_storage, t_input["rtol"], t_input["atol"], + t_input["atol_upar"], t_input["step_update_prefactor"], + t_input["max_increase_factor"], t_input["max_increase_factor_near_last_fail"], t_input["last_fail_proximity_factor"], t_input["minimum_dt"], t_input["maximum_dt"], @@ -2019,7 +2020,7 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param scratch[t_params.n_rk_stages+1], pdf, moments, fields, nothing, nothing, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, nl_solver_params, advance, scratch_dummy, false; + t_params, nl_solver_params, advance, scratch_dummy, false, 0, 0.0; pdf_bc_constraints=false, update_electrons=false) end @@ -2311,8 +2312,9 @@ function apply_all_bcs_constraints_update_moments!( this_scratch, pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, nl_solver_params, advance, scratch_dummy, diagnostic_moments; - pdf_bc_constraints=true, update_electrons=true) + t_params, nl_solver_params, advance, scratch_dummy, diagnostic_moments, + max_electron_pdf_iterations, max_electron_sim_time; pdf_bc_constraints=true, + update_electrons=true) begin_s_r_z_region() @@ -2374,8 +2376,6 @@ function apply_all_bcs_constraints_update_moments!( composition.electron_physics) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) - max_electron_pdf_iterations = 1000 - max_electron_sim_time = 1.0e-3 # Copy ion and electron moments from `scratch` into `moments` to be used in # electron kinetic equation update @@ -2638,7 +2638,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, t_params, nl_solver_params, advance, - scratch_dummy, false; update_electrons=false) + scratch_dummy, false, 0, 0.0; update_electrons=false) # Re-calculate moment derivatives in the `moments` struct, in case they were changed # by the previous call @@ -2647,7 +2647,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, t_params, nl_solver_params, advance, - scratch_dummy, false; pdf_bc_constraints=false, update_electrons=false) + scratch_dummy, false, 0, 0.0; pdf_bc_constraints=false, update_electrons=false) # Calculate the timstep error estimates if z.bc == "wall" && (moments.evolve_upar || moments.evolve_ppar) @@ -2936,6 +2936,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, n_rk_stages = t_params.n_rk_stages + max_electron_pdf_iterations = 1000 + max_electron_sim_time = 1.0e-3 + first_scratch = scratch[1] @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin first_scratch.pdf[ivpa,ivperp,iz,ir,is] = pdf.ion.norm[ivpa,ivperp,iz,ir,is] @@ -3034,7 +3037,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, nl_solver_params, advance, scratch_dummy, false) + t_params, nl_solver_params, advance, scratch_dummy, false, + max_electron_pdf_iterations, max_electron_sim_time) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3081,8 +3085,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, t_params, nl_solver_params, - advance, scratch_dummy, diagnostic_moments; - pdf_bc_constraints=apply_bc_constraints, update_electrons=update_electrons) + advance, scratch_dummy, diagnostic_moments, max_electron_pdf_iterations, + max_electron_sim_time; pdf_bc_constraints=apply_bc_constraints, + update_electrons=update_electrons) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3093,7 +3098,27 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, if t_params.adaptive nl_max_its_fraction = 0.0 - for p ∈ nl_solver_params + if t_params.implicit_electron_advance + params_to_check = (nl_solver_params.ion_advance, + nl_solver_params.vpa_advection, + nl_solver_params.electron_conduction, + nl_solver_params.electron_advance) + else + # nl_solver_params.electron_advance is used for the backward-Euler timestep in + # electron timestepping, so its iteration count is not relevant here. Instead, + # check the number of electron pseudo-timesteps or pseudo-time increment + # compared to their maximum values + params_to_check = (nl_solver_params.ion_advance, + nl_solver_params.vpa_advection, + nl_solver_params.electron_conduction) + if t_params.electron !== nothing + electron_time_advance_fraction = + min(t_params.electron.max_step_count_this_ion_step[] / max_electron_pdf_iterations, + t_params.electron.max_t_increment_this_ion_step[] / max_electron_sim_time) + nl_max_its_fraction = max(electron_time_advance_fraction, nl_max_its_fraction) + end + end + for p ∈ params_to_check if p !== nothing nl_max_its_fraction = max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations, @@ -3118,6 +3143,11 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, reset_nonlinear_per_stage_counters!(nl_solver_params.ion_advance) reset_nonlinear_per_stage_counters!(nl_solver_params.vpa_advection) reset_nonlinear_per_stage_counters!(nl_solver_params.electron_conduction) + if !t_params.implicit_electron_advance && t_params.electron !== nothing + t_params.electron.max_step_count_this_ion_step[] = 0 + t_params.electron.max_t_increment_this_ion_step[] = 0.0 + end + if t_params.previous_dt[] > 0.0 istage = n_rk_stages+1 From 8357a1eb32a6825cd7b03052bf5ed13184033186 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 20:42:18 +0100 Subject: [PATCH 018/107] Fix initialisation of vth and electron moment derivatives in electron_backward_euler!() --- .../src/electron_kinetic_equation.jl | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index b1334e108..357f338aa 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -651,10 +651,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # Use forward-Euler step (with `ion_dt` as the timestep) as initial guess for # updated electron_ppar - electron_energy_equation!(scratch[t_params.n_rk_stages+1].electron_ppar, - moments.electron.ppar, moments.electron.dens, - moments.electron.upar, moments.ion.dens, - moments.ion.upar, moments.ion.ppar, + ppar_guess = scratch[t_params.n_rk_stages+1].electron_ppar + electron_energy_equation!(ppar_guess, moments.electron.ppar, + moments.electron.dens, moments.electron.upar, + moments.ion.dens, moments.ion.upar, moments.ion.ppar, moments.neutral.dens, moments.neutral.uz, moments.neutral.pz, moments.electron, collisions, ion_dt, composition, external_source_settings.electron, @@ -667,6 +667,21 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end reduced_by_ion_dt = true end + + begin_r_z_region() + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron parallel pressure + moments.electron.vth[iz,ir] = sqrt(abs(2.0 * ppar_guess[iz,ir] / + (moments.electron.dens[iz,ir] * + composition.me_over_mi))) + end + calculate_electron_moment_derivatives!(moments, + (electron_density=moments.electron.dens, + electron_upar=moments.electron.upar, + electron_ppar=ppar_guess), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) end if !evolve_ppar From ad2636ccc33e1b29bf709c32df7d9c22055203ba Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 22:05:35 +0100 Subject: [PATCH 019/107] Create 'global' strong-form differentiation matrix in gauss_legendre Actually only global within a block - if the dimension is parallelised with distributed MPI, it is not actually the global matrix. --- moment_kinetics/src/gauss_legendre.jl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index f39a7ace5..0026dbeec 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -94,6 +94,8 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info K_matrix::TSparse # global (1D) weak Laplacian derivative matrix L_matrix::TSparse + # global (1D) strong first derivative matrix + D_matrix::TSparse # global (1D) LU object mass_matrix_lu::TLU # dummy matrix for local operators @@ -113,14 +115,16 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, mass_matrix = allocate_float(coord.n,coord.n) K_matrix = allocate_float(coord.n,coord.n) L_matrix = allocate_float(coord.n,coord.n) + D_matrix = allocate_float(coord.n,coord.n) setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc) setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc) setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; dirichlet_bc=dirichlet_bc) mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) - return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),mass_matrix_lu,Qmat) + return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),mass_matrix_lu,Qmat) end function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true) @@ -933,6 +937,8 @@ function get_QQ_local!(QQ::Array{mk_float,2},ielement, get_LL_local!(QQ,ielement,lobatto,radau,coord) elseif option == "L_with_BC_terms" get_LL_local!(QQ,ielement,lobatto,radau,coord,explicit_BC_terms=true) + elseif option == "D" + get_DD_local!(QQ,ielement,lobatto,radau,coord) end return nothing end @@ -1100,6 +1106,18 @@ function get_LL_local!(QQ,ielement, return nothing end +# Strong-form differentiation matrix +function get_DD_local!(QQ, ielement, lobatto::gausslegendre_base_info, + radau::gausslegendre_base_info, coord) + scale_factor = coord.element_scale[ielement] + if coord.name == "vperp" && ielement == 1 && coord.irank == 0 + @. QQ = radau.Dmat / scale_factor + else + @. QQ = lobatto.Dmat / scale_factor + end + return nothing +end + # mass matrix without vperp factor (matrix N) # only useful for the vperp coordinate function get_MN_local!(QQ,ielement, From 768aca7b7b20bbcd1ca5806ad51e55e82195daad Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 22:46:42 +0100 Subject: [PATCH 020/107] Remove 'nu_ei' from examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml Only really meant to include Krook collisions - not quite sure what 'nu_ei' does... --- .../periodic_split3_kinetic-implicit-electron_ppar-loworder.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml index f82a5c2f3..689fa44a3 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -44,7 +44,6 @@ vpa_IC_temperature_phase2 = 0.0 charge_exchange_frequency = 0.75 ionization_frequency = 0.0 constant_ionization_rate = false -nu_ei = 1000.0 r_ngrid = 1 r_nelement = 1 z_ngrid = 5 From 9931c953f9edea05dfd52e8e1e2e16ff6fbe3c9c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 13 Aug 2024 22:47:53 +0100 Subject: [PATCH 021/107] Preconditioner for electron_ppar in electron_backward_euler!() --- .../src/electron_kinetic_equation.jl | 150 ++++++++++++++++-- moment_kinetics/src/nonlinear_solvers.jl | 16 +- moment_kinetics/src/time_advance.jl | 2 +- 3 files changed, 155 insertions(+), 13 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 357f338aa..e1d463de8 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2,6 +2,7 @@ module electron_kinetic_equation using LinearAlgebra using MPI +using SparseArrays export get_electron_critical_velocities @@ -12,6 +13,8 @@ using ..boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition! using ..calculus: derivative!, second_derivative!, integral using ..communication +using ..gauss_legendre: gausslegendre_info +using ..input_structs using ..interpolation: interpolate_to_grid_1d! using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float @@ -660,14 +663,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos ion_dt, composition, external_source_settings.electron, num_diss_params, r, z) - if t_params.dt[] > 0.5 * ion_dt - begin_serial_region() - @serial_region begin - t_params.dt[] = 0.5 * ion_dt - end - reduced_by_ion_dt = true - end - begin_r_z_region() @loop_r_z ir iz begin # update the electron thermal speed using the updated electron parallel pressure @@ -744,6 +739,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # initialise the electron pdf convergence flag to false electron_pdf_converged = false + # Reset nl_solver_params.stage_counter[] so that the preconditioner is re-computed at + # the first step + nl_solver_params.stage_counter[] = 0 + first_step = true # evolve (artificially) in time until the residual is less than the tolerance while (!electron_pdf_converged @@ -784,6 +783,137 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos composition, external_source_settings, num_diss_params, t_params.dt[], ir) + if nl_solver_params.preconditioner_type == "electron_split_lu" + if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + if z.irank == 0 + ppar_matrix = allocate_float(z.n, z.n) + ppar_matrix .= 0.0 + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + + dt = t_params.dt[] + vth = @view moments.electron.vth[:,ir] + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth + + # Note that as + # qpar = 2 * ppar * vth * third_moment + # = 2 * ppar^(3/2) / dens^(1/2) * third_moment + # we have that + # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) * d(third_moment)/dz + # - ppar^(3/2) / dens^(3/2) * third_moment * d(dens)/dz + # + 3 * ppar^(1/2) / dens^(1/2) * third_moment * d(ppar)/dz + # so for the Jacobian + # d[d(qpar)/dz)]/d[ppar] + # = 3 * ppar^(1/2) / dens^(1/2) * d(third_moment)/dz + # - 3/2 * ppar^(1/2) / dens^(3/2) * third_moment * d(dens)/dz + # + 3/2 / ppar^(1/2) / dens^(1/2) * third_moment * d(ppar)/dz + # + 3 * ppar^(1/2) / dens^(1/2) * third_moment * d(.)/dz + dthird_moment_dz = z.scratch2 + derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + # Diagonal terms + for row ∈ 1:z.n + ppar_matrix[row,row] = 1.0 + + # 3*ppar*dupar_dz + ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row] + + # terms from d(qpar)/dz + ppar_matrix[row,row] += + dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row]) * dthird_moment_dz[row] + - 1.5 * sqrt(electron_ppar_new[row]) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] + + 1.5 / sqrt(electron_ppar_new[row] / dens[row]) * third_moment[row] * dppar_dz[row]) + end + if ion_dt !== nothing + # Backward-Euler forcing term + for row ∈ 1:z.n + ppar_matrix[row,row] += dt / ion_dt + end + end + + + # d(.)/dz terms + # Note that the z-derivative matrix is local to this block, and + # for the preconditioner we do not include any distributed-MPI + # communication (we rely on the JFNK iteration to sort out the + # coupling between blocks). + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral coordinate type is " + * "supported by electron_backward_euler!() " + * "preconditioner because we need differentiation" + * "matrices.") + end + z_deriv_matrix = z_spectral.D_matrix + for row ∈ 1:z.n + @. ppar_matrix[row,:] += + dt * (upar[row] + + 3.0 * sqrt(electron_ppar_new[row] / dens[row]) * third_moment[row]) * + z_deriv_matrix[row,:] + end + + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) + else + ppar_matrix = allocate_float(0, 0) + ppar_matrix[] = 1.0 + end + end + + function split_precon!(x) + precon_ppar, precon_f = x + + begin_z_region() + ppar_precon_matrix = nl_solver_params.preconditioners.ppar[ir] + @loop_z iz begin + z.scratch[iz] = precon_ppar[iz] + end + + begin_serial_region() + @serial_region begin + ldiv!(precon_ppar, ppar_precon_matrix, z.scratch) + end + end + + left_preconditioner = identity + right_preconditioner = split_precon! + elseif nl_solver_params.preconditioner_type == "none" + left_preconditioner = identity + right_preconditioner = identity + else + error("preconditioner_type=$(nl_solver_params.preconditioner_type) is not " + * "supported by electron_backward_euler!().") + end + # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. function residual_func!(residual, new_variables) @@ -941,8 +1071,8 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos newton_success = newton_solve!((electron_ppar_new, f_electron_new), residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; - left_preconditioner=identity, - right_preconditioner=identity, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, coords=(z=z, vperp=vperp, vpa=vpa)) if newton_success #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index c898f32ad..725ff1e36 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -62,6 +62,7 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} serial_solve::Bool max_nonlinear_iterations_this_step::Ref{mk_int} max_linear_iterations_this_step::Ref{mk_int} + preconditioner_type::String preconditioner_update_interval::mk_int preconditioners::Tprecon end @@ -77,7 +78,7 @@ for example a preconditioner object for each point in that outer loop. """ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol=1.0e-5, default_atol=1.0e-12, serial_solve=false, - electron_ppar_pdf_solve=false, preconditioner_type=nothing) + electron_ppar_pdf_solve=false, preconditioner_type="none") nl_solver_section = set_defaults_and_check_section!( input_dict, "nonlinear_solver"; rtol=default_rtol, @@ -131,8 +132,18 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol # These will be calculated properly within the time loop. preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)), reverse(outer_coord_sizes)) - else + elseif preconditioner_type == "electron_split_lu" + preconditioners = (z=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), + tuple(coords.vpa.n, reverse(outer_coord_sizes)...)), + vpa=fill(lu(sparse(1.0*I, coords.vpa.n, coords.vpa.n)), + tuple(coords.z.n, reverse(outer_coord_sizes)...)), + ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), + reverse(outer_coord_sizes)), + ) + elseif preconditioner_type == "none" preconditioners = nothing + else + error("Unrecognised preconditioner_type=$preconditioner_type") end linear_initial_guess = zeros(linear_restart) @@ -143,6 +154,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol linear_restart, nl_solver_input.linear_max_restarts, H, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), serial_solve, Ref(0), Ref(0), + preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 556dce9e3..25df9930b 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -674,7 +674,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type="lu") + preconditioner_type="electron_split_lu") else nl_solver_electron_advance_params = nothing end From 334ec08343158c308faef1a56b5785413127bc93 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 14 Aug 2024 17:47:18 +0100 Subject: [PATCH 022/107] Attempt to add preconditioning for z-advection + diagonal terms in electron_backward_euler!() ...does not seem to actually reduce iteration counts significantly. --- .../src/electron_kinetic_equation.jl | 141 +++++++++++++++--- 1 file changed, 120 insertions(+), 21 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index e1d463de8..ff987395c 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -31,7 +31,8 @@ using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_v using ..em_fields: update_phi! using ..external_sources: external_electron_source! using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io -using ..krook_collisions: electron_krook_collisions! +using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee, + get_collision_frequency_ei using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct @@ -785,6 +786,103 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if nl_solver_params.preconditioner_type == "electron_split_lu" if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + dt = t_params.dt[] + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = electron_ppar_new + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + source_amplitude = moments.electron.external_source_amplitude + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + + # Note the region(s) used here must be the same as the region(s) used + # when the matrices are used in `split_precon!()`, so that the + # parallelisation is the same and each matrix is used on the same + # process that created it. + + # z-advection preconditioner + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + z_matrix = allocate_float(z.n, z.n) + z_matrix .= 0.0 + + z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir] + for ielement ∈ 1:z.nelement_local + imin = z.imin[ielement] - (ielement != 1) + imax = z.imax[ielement] + if ielement == 1 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + else + if z_speed[imin] < 0.0 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + elseif z_speed[imin] > 0.0 + # Do nothing + else + z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + end + end + z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement] + if ielement == z.nelement_local + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + if z_speed[imax] < 0.0 + # Do nothing + elseif z_speed[imax] > 0.0 + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + end + end + end + # Multiply by advection speed + for row ∈ 1:z.n + z_matrix[row,:] .*= dt * z_speed[row] + end + + # Diagonal entries + for row ∈ 1:z.n + z_matrix[row,row] += 1.0 + + # Terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row] + + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row] + - dvth_dz[row] / vth[row])) + end + if external_source_settings.electron.active + for row ∈ 1:z.n + # Source terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row] + - (0.5 * source_pressure_amplitude[row] + + source_momentum_amplitude[row]) / ppar[row] + ) + end + if external_source_settings.electron.source_type == "energy" + for row ∈ 1:z.n + # Contribution from `external_electron_source!()` + z_matrix[row,row] += dt * source_amplitude[row] + end + end + end + if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 + for row ∈ 1:z.n + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row]) + nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row]) + z_matrix[row,row] += dt * (nu_ee + nu_ei) + end + end + + nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix)) + end + if z.irank == 0 ppar_matrix = allocate_float(z.n, z.n) ppar_matrix .= 0.0 @@ -797,30 +895,22 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos * "in electron_backward_euler!() preconditioner.") end - dt = t_params.dt[] - vth = @view moments.electron.vth[:,ir] - dens = @view moments.electron.dens[:,ir] - upar = @view moments.electron.upar[:,ir] - ddens_dz = @view moments.electron.ddens_dz[:,ir] - dupar_dz = @view moments.electron.dupar_dz[:,ir] - dppar_dz = @view moments.electron.dppar_dz[:,ir] - # Reconstruct w_∥^3 moment of g_e from already-calculated qpar @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth # Note that as # qpar = 2 * ppar * vth * third_moment - # = 2 * ppar^(3/2) / dens^(1/2) * third_moment + # = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment # we have that - # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) * d(third_moment)/dz - # - ppar^(3/2) / dens^(3/2) * third_moment * d(dens)/dz - # + 3 * ppar^(1/2) / dens^(1/2) * third_moment * d(ppar)/dz + # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz # so for the Jacobian # d[d(qpar)/dz)]/d[ppar] - # = 3 * ppar^(1/2) / dens^(1/2) * d(third_moment)/dz - # - 3/2 * ppar^(1/2) / dens^(3/2) * third_moment * d(dens)/dz - # + 3/2 / ppar^(1/2) / dens^(1/2) * third_moment * d(ppar)/dz - # + 3 * ppar^(1/2) / dens^(1/2) * third_moment * d(.)/dz + # = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz dthird_moment_dz = z.scratch2 derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2, buffer_3, buffer_4, z_spectral, z) @@ -834,9 +924,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # terms from d(qpar)/dz ppar_matrix[row,row] += - dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row]) * dthird_moment_dz[row] - - 1.5 * sqrt(electron_ppar_new[row]) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] - + 1.5 / sqrt(electron_ppar_new[row] / dens[row]) * third_moment[row] * dppar_dz[row]) + dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row] + - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] + + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row]) end if ion_dt !== nothing # Backward-Euler forcing term @@ -861,7 +951,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos for row ∈ 1:z.n @. ppar_matrix[row,:] += dt * (upar[row] - + 3.0 * sqrt(electron_ppar_new[row] / dens[row]) * third_moment[row]) * + + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) * z_deriv_matrix[row,:] end @@ -892,6 +982,15 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos function split_precon!(x) precon_ppar, precon_f = x + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + z_precon_matrix = nl_solver_params.preconditioners.z[ivpa,ivperp,ir] + f_slice = @view precon_f[ivpa,ivperp,:] + @views z.scratch .= f_slice + ldiv!(z.scratch2, z_precon_matrix, z.scratch) + f_slice .= z.scratch2 + end + begin_z_region() ppar_precon_matrix = nl_solver_params.preconditioners.ppar[ir] @loop_z iz begin From f798dcafc1ea38ea48bd8b36fb3576664363270b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 16 Aug 2024 12:09:36 +0100 Subject: [PATCH 023/107] Handle *_preconditioner default args outside loop in newton_solve!()) Should be marginally more efficient. --- moment_kinetics/src/nonlinear_solvers.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 725ff1e36..6aa1296ed 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -276,6 +276,13 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, parallel_map = get_parallel_map(coords) parallel_delta_x_calc = get_parallel_delta_x_calc(coords) + if left_preconditioner === nothing + left_preconditioner = identity + end + if right_preconditioner === nothing + right_preconditioner = identity + end + residual_func!(residual, x) residual_norm = distributed_norm(residual) counter = 0 @@ -291,13 +298,6 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, counter += 1 #println("\nNewton ", counter) - if left_preconditioner === nothing - left_preconditioner = identity - end - if right_preconditioner === nothing - right_preconditioner = identity - end - # Solve (approximately?): # J δx = -RHS(x) parallel_map(()->0.0, delta_x) From 766a2d52e472ae0e9490ed8f92672a5ac408c692 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 16 Aug 2024 15:57:37 +0100 Subject: [PATCH 024/107] Disable handling periodic bc in gauss_legendre, but add flag to enable The handling of periodic bc only works when not using distributed-MPI, so is not very useful. It is now disabled by default, but can be enabled by passing a flag to `setup_gausslegendre_pseudospectral()` --- moment_kinetics/src/gauss_legendre.jl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 0026dbeec..3e64d7f86 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -102,13 +102,13 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info Qmat::Array{mk_float,2} end -function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, dirichlet_bc=false) +function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, dirichlet_bc=false, handle_global_periodic=false) lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,collision_operator_dim=collision_operator_dim) radau = setup_gausslegendre_pseudospectral_radau(coord,collision_operator_dim=collision_operator_dim) if collision_operator_dim S_matrix = allocate_float(coord.n,coord.n) - setup_global_weak_form_matrix!(S_matrix, lobatto, radau, coord, "S") + setup_global_weak_form_matrix!(S_matrix, lobatto, radau, coord, "S", handle_periodic=handle_global_periodic) else S_matrix = allocate_float(0, 0) end @@ -117,10 +117,10 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, L_matrix = allocate_float(coord.n,coord.n) D_matrix = allocate_float(coord.n,coord.n) - setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc) - setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc) - setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc) - setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) + setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) + setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) + setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) @@ -839,7 +839,7 @@ where M is the mass matrix and K is the stiffness matrix. function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, lobatto::gausslegendre_base_info, radau::gausslegendre_base_info, - coord,option; dirichlet_bc=false) + coord,option; dirichlet_bc=false, handle_periodic=true) QQ_j = allocate_float(coord.ngrid,coord.ngrid) QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid) @@ -853,10 +853,10 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, # N.B. QQ varies with ielement for vperp, but not vpa # a radau element is used for the vperp grid (see get_QQ_local!()) get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) - if coord.bc == "periodic" && coord.nrank != 1 + if handle_periodic && coord.bc == "periodic" && coord.nrank != 1 error("periodic boundary conditions not supported when dimension is distributed") end - if coord.bc == "periodic" && coord.nrank == 1 + if handle_periodic && coord.bc == "periodic" && coord.nrank == 1 QQ_global[imax[end], imin[j]:imax[j]] .+= QQ_j[1,:] ./ 2.0 QQ_global[1,1] += 1.0 QQ_global[1,end] += -1.0 @@ -881,7 +881,7 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, end # upper boundary assembly on element if j == coord.nelement_local - if coord.bc == "periodic" && coord.nrank == 1 + if handle_periodic && coord.bc == "periodic" && coord.nrank == 1 QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] / 2.0 else QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] @@ -891,7 +891,7 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, end end - if dirichlet_bc && !coord.bc == "periodic" + if dirichlet_bc && !(handle_periodic && coord.bc == "periodic") # Make matrix diagonal for first/last grid points so it does not change the values # there if !(coord.name == "vperp") From 33b540b4e5b1aff32052a2d30f7410d0f435a310 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 16 Aug 2024 18:23:01 +0100 Subject: [PATCH 025/107] Add 'dense' second derivative matrix to gauss_legendre For calculating preconditioners, it can be useful to have an explicitly calculated matrix ``` dense_second_deriv_matrix = inv(mass_matrix) * K_matrix ``` which includes the inverted mass-matrix already. Because of the matrix inverse, dense_second_deriv_matrix is a dense matrix, so (for efficiency) should not be used unless absolutely necessary. --- moment_kinetics/src/gauss_legendre.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 3e64d7f86..de95dae35 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -96,6 +96,9 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info L_matrix::TSparse # global (1D) strong first derivative matrix D_matrix::TSparse + # global (1D) weak second derivative matrix, with inverse mass matrix included (so + # matrix is dense) + dense_second_deriv_matrix::AbstractArray{mk_float,2} # global (1D) LU object mass_matrix_lu::TLU # dummy matrix for local operators @@ -121,10 +124,11 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; dirichlet_bc=dirichlet_bc, handle_periodic=handle_global_periodic) + dense_second_deriv_matrix = inv(mass_matrix) * K_matrix mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) - return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),mass_matrix_lu,Qmat) + return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),dense_second_deriv_matrix,mass_matrix_lu,Qmat) end function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true) From 771a7f2eb0086a953e7fbf08d7619ce1076fe589 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 18 Aug 2024 18:16:42 +0100 Subject: [PATCH 026/107] Re-calculate qpar in more places in electron_backward_euler!() ...to ensure that the value of qpar and dqpar/dz is always consistent with the current distribution function, ppar, and vth --- .../src/electron_kinetic_equation.jl | 54 ++++++++++++++----- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index ff987395c..5a2151271 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -641,6 +641,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos composition.me_over_mi))) scratch[t_params.n_rk_stages+1].electron_ppar[iz,ir] = moments.electron.ppar[iz,ir] end + calculate_electron_qpar_from_pdf!(moments.electron.qpar, moments.electron.ppar, + moments.electron.vth, + scratch[t_params.n_rk_stages+1].pdf_electron, vpa) calculate_electron_moment_derivatives!(moments, (electron_density=moments.electron.dens, electron_upar=moments.electron.upar, @@ -671,6 +674,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos (moments.electron.dens[iz,ir] * composition.me_over_mi))) end + calculate_electron_qpar_from_pdf!(moments.electron.qpar, ppar_guess, + moments.electron.vth, + scratch[t_params.n_rk_stages+1].pdf_electron, + vpa) calculate_electron_moment_derivatives!(moments, (electron_density=moments.electron.dens, electron_upar=moments.electron.upar, @@ -783,6 +790,18 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos scratch_dummy, collisions, composition, external_source_settings, num_diss_params, t_params.dt[], ir) + # Calculate heat flux and derivatives using updated f_electron + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + @views calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=moments.electron.dens[:,ir], + electron_upar=moments.electron.upar[:,ir], + electron_ppar=electron_ppar_new), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) if nl_solver_params.preconditioner_type == "electron_split_lu" if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 @@ -1024,12 +1043,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos vpa_spectral, vpa_advect, num_diss_params, composition, ir) - # Calculate heat flux and derivatives using new_variables - calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, - electron_ppar_newvar, - moments.electron.vth, - f_electron_newvar, vpa, ir) - if evolve_ppar this_dens = moments.electron.dens this_upar = moments.electron.upar @@ -1042,6 +1055,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos (this_dens[iz,ir] * composition.me_over_mi))) end + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) + calculate_electron_moment_derivatives_no_r!( moments, (electron_density=this_dens, @@ -1050,6 +1070,12 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos scratch_dummy, z, z_spectral, num_diss_params.electron.moment_dissipation_coefficient, ir) else + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) # compute the z-derivative of the parallel electron heat flux @views derivative_z!(moments.electron.dqpar_dz[:,ir], moments.electron.qpar[:,ir], buffer_1, buffer_2, @@ -1255,10 +1281,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if !evolve_ppar # update the electron heat flux moments.electron.qpar_updated[] = false - calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, - electron_ppar_new, - moments.electron.vth, - f_electron_new, vpa, ir) + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) # compute the z-derivative of the parallel electron heat flux @views derivative_z!(moments.electron.dqpar_dz[:,ir], @@ -1460,10 +1486,10 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo num_diss_params, composition, ir) # Calculate heat flux and derivatives using new_variables - calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar, - electron_ppar_new, - moments.electron.vth, - f_electron_new, vpa, ir) + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) this_dens = moments.electron.dens this_upar = moments.electron.upar From d486225b830cd95b7c7d0d1009bf790a3c261b08 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 14 Aug 2024 18:40:15 +0100 Subject: [PATCH 027/107] Non-split LU preconditioner for electron_backward_euler!() --- .../src/electron_kinetic_equation.jl | 486 +++++++++++++++++- moment_kinetics/src/nonlinear_solvers.jl | 8 + moment_kinetics/src/time_advance.jl | 3 +- 3 files changed, 495 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5a2151271..51a84f5f9 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -8,7 +8,7 @@ export get_electron_critical_velocities using ..looping using ..analysis: steady_state_residuals -using ..derivatives: derivative_z! +using ..derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using ..boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition! using ..calculus: derivative!, second_derivative!, integral @@ -1024,6 +1024,490 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos left_preconditioner = identity right_preconditioner = split_precon! + elseif nl_solver_params.preconditioner_type == "electron_lu" + if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + dt = t_params.dt[] + f = f_electron_new + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = electron_ppar_new + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient + source_amplitude = moments.electron.external_source_amplitude + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_advect[1].speed[:,ivpa,ivperp,ir] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!( + dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], z_spectral, z) + + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, + vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], + f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], + vpa_spectral) + end + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral z-coordinate type is " + * "supported by electron_backward_euler!() " + * "preconditioner because we need differentiation" + * "matrices.") + end + z_deriv_matrix = z_spectral.D_matrix + + if !isa(vpa_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral vpa-coordinate type is " + * "supported by electron_backward_euler!() " + * "preconditioner because we need differentiation" + * "matrices.") + end + vpa_deriv_matrix = vpa_spectral.D_matrix + vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix + + _, precon_matrix, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] + + pdf_size = z.n * vperp.n * vpa.n + z_size = z.n + v_size = vperp.n * vpa.n + ppar_size = z.n + total_size = pdf_size + ppar_size + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + v_remainder = (ivperp - 1) * vpa.n + ivpa + + precon_matrix[row,:] .= 0.0 + precon_matrix[row,row] += 1.0 + + if z.bc == "wall" && (iz == 1 || iz == z.n) + error("Need to do something about wall boundary condition in preconditioner matrix") + end + if ivpa == 1 || ivpa == vpa.n || (vperp.n > 1 && ivperp == vperp.n) + # Leave matrix as identity for these rows to impose Dirichlet + # boundary condition. + continue + end + + ielement_z = z.ielement[iz] + igrid_z = z.igrid[iz] + icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) + icolumn_max_z = z.imax[ielement_z] + + ielement_vpa = vpa.ielement[ivpa] + igrid_vpa = vpa.igrid[ivpa] + icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1) + icolumn_max_vpa = vpa.imax[ielement_vpa] + + z_speed = z_advect[1].speed[iz,ivpa,ivperp,ir] + + # Contributions from (w_∥*vth + upar)*dg/dz + if ielement_z == 1 && igrid_z == 1 + precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z] + elseif ielement_z == z.nelement_local && igrid_z == z.ngrid + precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + elseif igrid_z == z.ngrid + # Note igrid_z is only ever 1 when ielement_z==1, because + # of the way element boundaries are counted. + icolumn_min_z_next = z.imin[ielement_z+1] - 1 + icolumn_max_z_next = z.imax[ielement_z+1] + if z_speed < 0.0 + precon_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + elseif z_speed > 0.0 + precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + else + precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + precon_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + end + else + precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[igrid_z,:] ./ z.element_scale[ielement_z] + end + # vth = sqrt(2*p/n/me) + # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth + # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz + precon_matrix[row,pdf_size+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] + + vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir] + + # Contributions from + # (1/2*vth/p*dp/dz + 1/2*w_∥/p*dq/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ + if ielement_vpa == 1 && igrid_vpa == 1 + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa] + elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + elseif igrid_vpa == vpa.ngrid + # Note igrid_vpa is only ever 1 when ielement_vpa==1, because + # of the way element boundaries are counted. + icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 + icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] + if vpa_speed < 0.0 + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + elseif vpa_speed > 0.0 + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + else + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + end + else + precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[igrid_vpa,:] ./ vpa.element_scale[ielement_vpa] + end + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # w_∥*0.5/p*dq/dz = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*vth*∫dw_∥ w_∥^3 dg/dz + # d(w_∥*0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # w_∥*(1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + w_∥*vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] + - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + precon_matrix[row,pdf_size+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) + for icolz ∈ 1:z.n + col = pdf_size + icolz + precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_matrix[iz,icolz] + end + # (1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # = (1/2*sqrt(2/p/n)*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/sqrt(2*p*n) + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/sqrt(2*p*n*me)*dp/dz - 1/2*sqrt(2*p/n/me)/n*dn/dz + # d(dvth/dz[irowz])/d(ppar[icolz]) = + # (-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz)[irowz] * delta(irowz,icolz) + # +1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + # ⇒ d((1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n)[irowz]/d(ppar[icolz]) + # = (-1/4*sqrt(2/n/me)/p^(3/2)*dp/dz + # - w_∥^2*(-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz) + # - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2) + # + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz) + # + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz] + precon_matrix[row,pdf_size+iz] += dt * ( + -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] + - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) + ) * dpdf_dvpa[ivpa,ivperp,iz] + if external_source_settings.electron.active + precon_matrix[row,pdf_size+iz] += dt * ( + -0.5*source_density_amplitude[iz]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz] + 2.0*upar[iz]*source_momentum_amplitude[iz])/ppar[iz]^2 + ) * dpdf_dvpa[ivpa,ivperp,iz] + end + for icolz ∈ 1:z.n + col = pdf_size + icolz + precon_matrix[row,col] += dt * ( + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me) + - vpa.grid[ivpa]^2/sqrt(2.0*ppar[iz]*dens[iz]*me) + ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_matrix[iz,icolz] + end + + # Terms from `add_contribution_from_pdf_term!()` + # (0.5/p*dq/dz + w_∥*vth*(1/n*dn/dz - 1/vth*dvth/dz))*g + # + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # 0.5/p*dq/dz = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + vth*∫dw_∥ w_∥^3 dg/dz + # d(0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # (1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - 1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + 1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + (1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/n/me/vth*dp/dz - p/n^2/me/vth*dn/dz + # = 1/n/me/vth*dp/dz - 1/2*vth/n*dn/dz + # ⇒ vth*(1/n*dn/dz - 1/vth*dvth/dz) + # = (vth/n*dn/dz - dvth/dz) + # = (vth/n*dn/dz - 1/n/me/vth*dp/dz + 1/2*vth/n*dn/dz) + # = (3/2*vth/n*dn/dz - 1/n/me/vth*dp/dz) + # = (3/2*sqrt(2*p/me)/n^(3/2)*dn/dz - 1/sqrt(2*p*n*me)*dp/dz) + # d(vth*(1/n*dn/dz - 1/vth*dvth/dz)[irowz])/d(ppar[icolz]) = + # (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz) + # -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + precon_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] + + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] + - dvth_dz[iz] / vth[iz])) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * vth[iz] * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + precon_matrix[row,pdf_size+iz] += + dt * f[ivpa,ivperp,iz] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] + + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] + + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) + for icolz ∈ 1:z.n + col = pdf_size + icolz + precon_matrix[row,col] += dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] + - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_matrix[iz,icolz] + end + + # Terms from add_dissipation_term!() + if vpa_dissipation_coefficient > 0.0 + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] + end + end + + if external_source_settings.electron.active + # Source terms from `add_contribution_from_pdf_term!()` + precon_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz] / dens[iz] + - (0.5 * source_pressure_amplitude[iz] + + source_momentum_amplitude[iz]) / ppar[iz] + ) + if external_source_settings.electron.source_type == "energy" + # Contribution from `external_electron_source!()` + precon_matrix[row,row] += dt * source_amplitude[iz] + end + end + + if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) + nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) + precon_matrix[row,row] += dt * (nu_ee + nu_ei) + end + end + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + precon_matrix[row,:] .= 0.0 + precon_matrix[row,row] += 1.0 + + # Note that as + # q = 2 * p * vth * ∫dw_∥ w_∥^3 g + # = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g + # we have that + # d(q)/dz = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - p^(3/2) * sqrt(2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3 * p^(1/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz + # so for the Jacobian + # d(d(q)/dz)[irowz])/d(p[icolz]) + # = (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - 3/2 * sqrt(2) * p^(1/2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3/2 * sqrt(2) / p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz)[irowz] * delta[irowz,icolz] + # + (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # d(d(q)/dz)[irowz])/d(g[icolvpa,icolvperp,icolz]) + # = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] + + # upar*dppar_dz + for icolz ∈ 1:z.n + col = pdf_size + icolz + precon_matrix[row,col] += + dt * upar[iz] * z_deriv_matrix[iz,icolz] + end + + # 3*ppar*dupar_dz + precon_matrix[row,row] += 3.0 * dt * dupar_dz[iz] + + # terms from d(qpar)/dz + precon_matrix[row,row] += + dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] + - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) + for icolz ∈ 1:z.n + col = pdf_size + icolz + precon_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_matrix[iz,icolz] + end + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] + + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + precon_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + + if ion_dt !== nothing + # Backward-Euler forcing term + precon_matrix[row,row] += dt / ion_dt + end + end + + nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) + end + + + function lu_precon!(x) + precon_ppar, precon_f = x + + precon_lu, _, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + input_buffer[counter] = precon_f[ivpa,ivperp,iz] + counter += 1 + end + @loop_z iz begin + input_buffer[counter] = precon_ppar[iz] + counter += 1 + end + + begin_serial_region() + @serial_region begin + ldiv!(output_buffer, precon_lu, input_buffer) + end + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + precon_f[ivpa,ivperp,iz] = output_buffer[counter] + counter += 1 + end + @loop_z iz begin + precon_ppar[iz] = output_buffer[counter] + counter += 1 + end + + return nothing + end + + left_preconditioner = identity + right_preconditioner = lu_precon! elseif nl_solver_params.preconditioner_type == "none" left_preconditioner = identity right_preconditioner = identity diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 6aa1296ed..4f547445f 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -140,6 +140,14 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), reverse(outer_coord_sizes)), ) + elseif preconditioner_type == "electron_lu" + pdf_plus_ppar_size = total_size_coords + coords.z.n + preconditioners = fill((lu(sparse(1.0*I, pdf_plus_ppar_size, pdf_plus_ppar_size)), + allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), + allocate_shared_float(pdf_plus_ppar_size), + allocate_shared_float(pdf_plus_ppar_size), + ), + reverse(outer_coord_sizes)) elseif preconditioner_type == "none" preconditioners = nothing else diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 25df9930b..418a9da97 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -674,7 +674,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type="electron_split_lu") + #preconditioner_type="electron_split_lu") + preconditioner_type="electron_lu") else nl_solver_electron_advance_params = nothing end From 5cc9fc69a65fe6ef7a44f711458dc11e6bf648d7 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 20 Aug 2024 18:11:49 +0100 Subject: [PATCH 028/107] Split Jacobian matrix (for electrons) calculation into separate funcs Also includes some fixes for the Jacobian matrix calculations, and extends them to handle a few more options. --- moment_kinetics/src/boundary_conditions.jl | 27 + .../src/electron_fluid_equations.jl | 101 +++ .../src/electron_kinetic_equation.jl | 723 +++++++----------- moment_kinetics/src/electron_vpa_advection.jl | 176 +++++ moment_kinetics/src/electron_z_advection.jl | 96 +++ moment_kinetics/src/external_sources.jl | 75 +- moment_kinetics/src/krook_collisions.jl | 56 +- moment_kinetics/src/moment_kinetics.jl | 2 +- 8 files changed, 809 insertions(+), 447 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 36ef5916e..3f63f459e 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -1029,4 +1029,31 @@ function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,3}, bc, vpe end end +""" + skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + +This function returns `true` when the grid point specified by `iz`, `ivperp`, `ivpa` would +be set by the boundary conditions on the electron distribution function. When this +happens, the corresponding row should be skipped when adding contributions to the Jacobian +matrix, so that the row remains the same as a row of the identity matrix, so that the +Jacobian matrix does not modify those points. Returns `false` otherwise. +""" +function skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + # z boundary condition + if z.bc == "wall" && (iz == 1 || iz == z.n) + error("Need to do something about wall boundary condition in preconditioner matrix") + end + + # vperp boundary condition + if vperp.n > 1 && ivperp == vperp.n + return true + end + + if ivpa == 1 || ivpa == vpa.n + return true + end + + return false +end + end # boundary_conditions diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 7a86bf182..07f84849d 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -4,6 +4,8 @@ export calculate_electron_density! export calculate_electron_upar_from_charge_conservation! export calculate_electron_moments! export electron_energy_equation! +export electron_energy_equation_no_r! +export add_electron_energy_equation_to_Jacobian! export calculate_electron_qpar! export calculate_electron_parallel_friction_force! export calculate_electron_qpar_from_pdf! @@ -336,6 +338,105 @@ function electron_energy_equation_no_r!(ppar_out, ppar_in, electron_density, return nothing end +function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + me = composition.me_over_mi + z_deriv_matrix = z_spectral.D_matrix + v_size = vperp.n * vpa.n + + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = ppar_offset + iz + + # Note that as + # q = 2 * p * vth * ∫dw_∥ w_∥^3 g + # = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g + # we have that + # d(q)/dz = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - p^(3/2) * sqrt(2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3 * p^(1/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz + # so for the Jacobian + # d(d(q)/dz)[irowz])/d(p[icolz]) + # = (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - 3/2 * sqrt(2) * p^(1/2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3/2 * sqrt(2) / p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz)[irowz] * delta[irowz,icolz] + # + (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # d(d(q)/dz)[irowz])/d(g[icolvpa,icolvperp,icolz]) + # = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] + + # upar*dppar_dz + for icolz ∈ 1:z.n + col = ppar_offset + icolz + jacobian_matrix[row,col] += + dt * upar[iz] * z_deriv_matrix[iz,icolz] + end + + # 3*ppar*dupar_dz + jacobian_matrix[row,row] += 3.0 * dt * dupar_dz[iz] + + # terms from d(qpar)/dz + jacobian_matrix[row,row] += + dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] + - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) + for icolz ∈ 1:z.n + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_matrix[iz,icolz] + end + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] + + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + end + + return nothing +end + """ electron_energy_residual!(residual, electron_ppar_out, electron_ppar, in, fvec_in, moments, collisions, composition, diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 51a84f5f9..eb269397c 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -10,7 +10,8 @@ using ..looping using ..analysis: steady_state_residuals using ..derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using ..boundary_conditions: enforce_v_boundary_condition_local!, - enforce_vperp_boundary_condition! + enforce_vperp_boundary_condition!, + skip_f_electron_bc_points_in_Jacobian using ..calculus: derivative!, second_derivative!, integral using ..communication using ..gauss_legendre: gausslegendre_info @@ -25,14 +26,19 @@ using ..electron_fluid_equations: calculate_electron_moments!, calculate_electron_parallel_friction_force! using ..electron_fluid_equations: electron_energy_equation!, electron_energy_equation_no_r!, + add_electron_energy_equation_to_Jacobian!, electron_energy_residual! -using ..electron_z_advection: electron_z_advection!, update_electron_speed_z! -using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa! +using ..electron_z_advection: electron_z_advection!, update_electron_speed_z!, + add_electron_z_advection_to_Jacobian! +using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa!, + add_electron_vpa_advection_to_Jacobian! using ..em_fields: update_phi! -using ..external_sources: external_electron_source! +using ..external_sources: external_electron_source!, + add_external_electron_source_to_Jacobian! using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee, - get_collision_frequency_ei + get_collision_frequency_ei, + add_electron_krook_collisions_to_Jacobian! using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct @@ -1026,448 +1032,21 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos right_preconditioner = split_precon! elseif nl_solver_params.preconditioner_type == "electron_lu" if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 - dt = t_params.dt[] - f = f_electron_new - vth = @view moments.electron.vth[:,ir] - me = composition.me_over_mi - dens = @view moments.electron.dens[:,ir] - upar = @view moments.electron.upar[:,ir] - ppar = electron_ppar_new - qpar = @view moments.electron.qpar[:,ir] - ddens_dz = @view moments.electron.ddens_dz[:,ir] - dupar_dz = @view moments.electron.dupar_dz[:,ir] - dppar_dz = @view moments.electron.dppar_dz[:,ir] - dvth_dz = @view moments.electron.dvth_dz[:,ir] - dqpar_dz = @view moments.electron.dqpar_dz[:,ir] - vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient - source_amplitude = moments.electron.external_source_amplitude - source_density_amplitude = moments.electron.external_source_density_amplitude - source_momentum_amplitude = moments.electron.external_source_momentum_amplitude - source_pressure_amplitude = moments.electron.external_source_pressure_amplitude - - dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] - dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] - - begin_vperp_vpa_region() - update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) - - @loop_vperp_vpa ivperp ivpa begin - @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_advect[1].speed[:,ivpa,ivperp,ir] - end - #calculate the upwind derivative - @views derivative_z_pdf_vpavperpz!( - dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], - scratch_dummy.buffer_vpavperpr_1[:,:,ir], - scratch_dummy.buffer_vpavperpr_2[:,:,ir], - scratch_dummy.buffer_vpavperpr_3[:,:,ir], - scratch_dummy.buffer_vpavperpr_4[:,:,ir], - scratch_dummy.buffer_vpavperpr_5[:,:,ir], - scratch_dummy.buffer_vpavperpr_6[:,:,ir], z_spectral, z) - - begin_z_vperp_region() - update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, - vpa.grid, - external_source_settings.electron, ir) - @loop_z_vperp iz ivperp begin - @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] - end - #calculate the upwind derivative of the electron pdf w.r.t. wpa - @loop_z_vperp iz ivperp begin - @views derivative!(dpdf_dvpa[:,ivperp,iz], - f[:,ivperp,iz], vpa, - vpa_advect[1].adv_fac[:,ivperp,iz,ir], - vpa_spectral) - end - - # Reconstruct w_∥^3 moment of g_e from already-calculated qpar - third_moment = scratch_dummy.buffer_z_1 - dthird_moment_dz = scratch_dummy.buffer_z_2 - begin_z_region() - @loop_z iz begin - third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] - end - derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, - buffer_3, buffer_4, z_spectral, z) - - if !isa(z_spectral, gausslegendre_info) - error("Only gausslegendre_pseudospectral z-coordinate type is " - * "supported by electron_backward_euler!() " - * "preconditioner because we need differentiation" - * "matrices.") - end - z_deriv_matrix = z_spectral.D_matrix - - if !isa(vpa_spectral, gausslegendre_info) - error("Only gausslegendre_pseudospectral vpa-coordinate type is " - * "supported by electron_backward_euler!() " - * "preconditioner because we need differentiation" - * "matrices.") - end - vpa_deriv_matrix = vpa_spectral.D_matrix - vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix - - _, precon_matrix, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] - - pdf_size = z.n * vperp.n * vpa.n - z_size = z.n - v_size = vperp.n * vpa.n - ppar_size = z.n - total_size = pdf_size + ppar_size - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin - # Rows corresponding to pdf_electron - row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa - v_remainder = (ivperp - 1) * vpa.n + ivpa - - precon_matrix[row,:] .= 0.0 - precon_matrix[row,row] += 1.0 - - if z.bc == "wall" && (iz == 1 || iz == z.n) - error("Need to do something about wall boundary condition in preconditioner matrix") - end - if ivpa == 1 || ivpa == vpa.n || (vperp.n > 1 && ivperp == vperp.n) - # Leave matrix as identity for these rows to impose Dirichlet - # boundary condition. - continue - end + orig_lu, precon_matrix, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] - ielement_z = z.ielement[iz] - igrid_z = z.igrid[iz] - icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) - icolumn_max_z = z.imax[ielement_z] - - ielement_vpa = vpa.ielement[ivpa] - igrid_vpa = vpa.igrid[ivpa] - icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1) - icolumn_max_vpa = vpa.imax[ielement_vpa] - - z_speed = z_advect[1].speed[iz,ivpa,ivperp,ir] - - # Contributions from (w_∥*vth + upar)*dg/dz - if ielement_z == 1 && igrid_z == 1 - precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z] - elseif ielement_z == z.nelement_local && igrid_z == z.ngrid - precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] - elseif igrid_z == z.ngrid - # Note igrid_z is only ever 1 when ielement_z==1, because - # of the way element boundaries are counted. - icolumn_min_z_next = z.imin[ielement_z+1] - 1 - icolumn_max_z_next = z.imax[ielement_z+1] - if z_speed < 0.0 - precon_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] - elseif z_speed > 0.0 - precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] - else - precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] - precon_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] - end - else - precon_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[igrid_z,:] ./ z.element_scale[ielement_z] - end - # vth = sqrt(2*p/n/me) - # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth - # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz - precon_matrix[row,pdf_size+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] - - vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir] - - # Contributions from - # (1/2*vth/p*dp/dz + 1/2*w_∥/p*dq/dz - w_∥^2*dvth/dz - # + source_density_amplitude*u/n/vth - # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p - # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ - if ielement_vpa == 1 && igrid_vpa == 1 - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa] - elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] - elseif igrid_vpa == vpa.ngrid - # Note igrid_vpa is only ever 1 when ielement_vpa==1, because - # of the way element boundaries are counted. - icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 - icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] - if vpa_speed < 0.0 - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] - elseif vpa_speed > 0.0 - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] - else - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] - end - else - precon_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[igrid_vpa,:] ./ vpa.element_scale[ielement_vpa] - end - # q = 2*p*vth*∫dw_∥ w_∥^3 g - # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g - # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz - # w_∥*0.5/p*dq/dz = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + w_∥*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz - # = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + w_∥*vth*∫dw_∥ w_∥^3 dg/dz - # d(w_∥*0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = - # w_∥*(1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - # + w_∥*vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] - # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) = - # (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) - # + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * - vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * - vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] - end - precon_matrix[row,pdf_size+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * - (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] - - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] - + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) - for icolz ∈ 1:z.n - col = pdf_size + icolz - precon_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_matrix[iz,icolz] - end - # (1/2*vth/p*dp/dz - w_∥^2*dvth/dz - # + source_density_amplitude*u/n/vth - # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p - # + w_∥*1/2*source_density_amplitude/n) - # = (1/2*sqrt(2/p/n)*dp/dz - w_∥^2*dvth/dz - # + source_density_amplitude*u/sqrt(2*p*n) - # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p - # + w_∥*1/2*source_density_amplitude/n) - # - # dvth/dz = d/dz(sqrt(2*p/n/me)) - # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz - # = 1/sqrt(2*p*n*me)*dp/dz - 1/2*sqrt(2*p/n/me)/n*dn/dz - # d(dvth/dz[irowz])/d(ppar[icolz]) = - # (-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz)[irowz] * delta(irowz,icolz) - # +1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] - # - # ⇒ d((1/2*vth/p*dp/dz - w_∥^2*dvth/dz - # + source_density_amplitude*u/n/vth - # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p - # + w_∥*1/2*source_density_amplitude/n)[irowz]/d(ppar[icolz]) - # = (-1/4*sqrt(2/n/me)/p^(3/2)*dp/dz - # - w_∥^2*(-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz) - # - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2) - # + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz) - # + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz] - precon_matrix[row,pdf_size+iz] += dt * ( - -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] - - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) - ) * dpdf_dvpa[ivpa,ivperp,iz] - if external_source_settings.electron.active - precon_matrix[row,pdf_size+iz] += dt * ( - -0.5*source_density_amplitude[iz]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 - + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz] + 2.0*upar[iz]*source_momentum_amplitude[iz])/ppar[iz]^2 - ) * dpdf_dvpa[ivpa,ivperp,iz] - end - for icolz ∈ 1:z.n - col = pdf_size + icolz - precon_matrix[row,col] += dt * ( - 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me) - - vpa.grid[ivpa]^2/sqrt(2.0*ppar[iz]*dens[iz]*me) - ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_matrix[iz,icolz] - end - - # Terms from `add_contribution_from_pdf_term!()` - # (0.5/p*dq/dz + w_∥*vth*(1/n*dn/dz - 1/vth*dvth/dz))*g - # - # q = 2*p*vth*∫dw_∥ w_∥^3 g - # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g - # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz - # 0.5/p*dq/dz = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz - # = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz - # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz - # + vth*∫dw_∥ w_∥^3 dg/dz - # d(0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = - # (1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - # + vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] - # d(0.5/p*dq/dz[irowz])/d(p[icolz]) = - # (-3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - 1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + 1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) - # + (1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] - # - # dvth/dz = d/dz(sqrt(2*p/n/me)) - # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz - # = 1/n/me/vth*dp/dz - p/n^2/me/vth*dn/dz - # = 1/n/me/vth*dp/dz - 1/2*vth/n*dn/dz - # ⇒ vth*(1/n*dn/dz - 1/vth*dvth/dz) - # = (vth/n*dn/dz - dvth/dz) - # = (vth/n*dn/dz - 1/n/me/vth*dp/dz + 1/2*vth/n*dn/dz) - # = (3/2*vth/n*dn/dz - 1/n/me/vth*dp/dz) - # = (3/2*sqrt(2*p/me)/n^(3/2)*dn/dz - 1/sqrt(2*p*n*me)*dp/dz) - # d(vth*(1/n*dn/dz - 1/vth*dvth/dz)[irowz])/d(ppar[icolz]) = - # (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz) - # -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] - # - precon_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] - + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] - - dvth_dz[iz] / vth[iz])) - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += - dt * f[ivpa,ivperp,iz] * - (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += - dt * f[ivpa,ivperp,iz] * vth[iz] * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] - end - precon_matrix[row,pdf_size+iz] += - dt * f[ivpa,ivperp,iz] * - (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] - - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] - + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] - + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] - + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) - for icolz ∈ 1:z.n - col = pdf_size + icolz - precon_matrix[row,col] += dt * f[ivpa,ivperp,iz] * - (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] - - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_matrix[iz,icolz] - end - - # Terms from add_dissipation_term!() - if vpa_dissipation_coefficient > 0.0 - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] - end - end - - if external_source_settings.electron.active - # Source terms from `add_contribution_from_pdf_term!()` - precon_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz] / dens[iz] - - (0.5 * source_pressure_amplitude[iz] - + source_momentum_amplitude[iz]) / ppar[iz] - ) - if external_source_settings.electron.source_type == "energy" - # Contribution from `external_electron_source!()` - precon_matrix[row,row] += dt * source_amplitude[iz] - end - end - - if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 - # Contribution from electron_krook_collisions!() - nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) - nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) - precon_matrix[row,row] += dt * (nu_ee + nu_ei) - end - end - - if composition.electron_physics == kinetic_electrons_with_temperature_equation - error("kinetic_electrons_with_temperature_equation not " - * "supported yet in preconditioner") - elseif composition.electron_physics != kinetic_electrons - error("Unsupported electron_physics=$(composition.electron_physics) " - * "in electron_backward_euler!() preconditioner.") - end - if num_diss_params.electron.moment_dissipation_coefficient > 0.0 - error("z-diffusion of electron_ppar not yet supported in " - * "preconditioner") - end - if collisions.nu_ei > 0.0 - error("electron-ion collision terms for electron_ppar not yet " - * "supported in preconditioner") - end - if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 - error("electron 'charge exchange' terms for electron_ppar not yet " - * "supported in preconditioner") - end - if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 - error("electron ionization terms for electron_ppar not yet " - * "supported in preconditioner") - end - begin_z_region() - @loop_z iz begin - # Rows corresponding to electron_ppar - row = pdf_size + iz - - precon_matrix[row,:] .= 0.0 - precon_matrix[row,row] += 1.0 - - # Note that as - # q = 2 * p * vth * ∫dw_∥ w_∥^3 g - # = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g - # we have that - # d(q)/dz = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz - # - p^(3/2) * sqrt(2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz - # + 3 * p^(1/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz - # so for the Jacobian - # d(d(q)/dz)[irowz])/d(p[icolz]) - # = (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz - # - 3/2 * sqrt(2) * p^(1/2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz - # + 3/2 * sqrt(2) / p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz)[irowz] * delta[irowz,icolz] - # + (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] - # d(d(q)/dz)[irowz])/d(g[icolvpa,icolvperp,icolz]) - # = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] - # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] - - # upar*dppar_dz - for icolz ∈ 1:z.n - col = pdf_size + icolz - precon_matrix[row,col] += - dt * upar[iz] * z_deriv_matrix[iz,icolz] - end + fill_electron_kinetic_equation_Jacobian!( + precon_matrix, f_electron_new, electron_ppar_new, moments, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.dt[], ion_dt, + ir, evolve_ppar) - # 3*ppar*dupar_dz - precon_matrix[row,row] += 3.0 * dt * dupar_dz[iz] - - # terms from d(qpar)/dz - precon_matrix[row,row] += - dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] - - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] - + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) - for icolz ∈ 1:z.n - col = pdf_size + icolz - precon_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_matrix[iz,icolz] - end - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] - + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa - precon_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] - end - - if ion_dt !== nothing - # Backward-Euler forcing term - precon_matrix[row,row] += dt / ion_dt - end + begin_serial_region() + if block_rank[] == 0 + nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) + else + nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer) end - - nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) end @@ -3240,6 +2819,107 @@ function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, return nothing end +""" + fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, moments, collisions, + composition, z, vperp, vpa, z_spectral, + vperp_specral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, dt, ion_dt, + ir, evolve_ppar) + +Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if +`evolve_ppar=true`) the electron energy equation. +""" +function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, moments, + collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, + num_diss_params, dt, ion_dt, ir, + evolve_ppar) + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + + upar_ion = @view moments.ion.upar[:,ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + pdf_size = z.n * vperp.n * vpa.n + v_size = vperp.n * vpa.n + + # Initialise jacobian_matrix to the identity + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + v_remainder = (ivperp - 1) * vpa.n + ivpa + + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + + add_electron_z_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, + z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, scratch_dummy, external_source_settings, dt, ir; ppar_offset=pdf_size) + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, scratch_dummy, dt, ir; ppar_offset=pdf_size) + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, dt, ir) + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + dt, ir; ppar_offset=pdf_size) + add_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, vpa, + dt, ir; ppar_offset=pdf_size) + if evolve_ppar + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir; ppar_offset=pdf_size) + end + if ion_dt !== nothing + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + end + + return nothing +end + #""" #electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation #INPUTS: @@ -3422,6 +3102,40 @@ function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vp return nothing end +function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params, + z, vperp, vpa, vpa_spectral, dt, ir; + f_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + + vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient + + if vpa_dissipation_coefficient ≤ 0.0 + return nothing + end + + v_size = vperp.n * vpa.n + vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Terms from add_dissipation_term!() + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] + end + end + + return nothing +end + """ update_electron_pdf! iterates to find a solution for the electron pdf from the electron kinetic equation: @@ -3660,6 +3374,127 @@ function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, mome return nothing end +function add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0) + + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + z_deriv_matrix = z_spectral.D_matrix + v_size = vperp.n * vpa.n + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + v_remainder = (ivperp - 1) * vpa.n + ivpa + + # Terms from `add_contribution_from_pdf_term!()` + # (0.5/p*dq/dz + w_∥*vth*(1/n*dn/dz - 1/vth*dvth/dz))*g + # + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # 0.5/p*dq/dz = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + vth*∫dw_∥ w_∥^3 dg/dz + # d(0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # (1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - 1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + 1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + (1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/n/me/vth*dp/dz - p/n^2/me/vth*dn/dz + # = 1/n/me/vth*dp/dz - 1/2*vth/n*dn/dz + # ⇒ vth*(1/n*dn/dz - 1/vth*dvth/dz) + # = (vth/n*dn/dz - dvth/dz) + # = (vth/n*dn/dz - 1/n/me/vth*dp/dz + 1/2*vth/n*dn/dz) + # = (3/2*vth/n*dn/dz - 1/n/me/vth*dp/dz) + # = (3/2*sqrt(2*p/me)/n^(3/2)*dn/dz - 1/sqrt(2*p*n*me)*dp/dz) + # d(vth*(1/n*dn/dz - 1/vth*dvth/dz)[irowz])/d(ppar[icolz]) = + # (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz) + # -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] + + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] + - dvth_dz[iz] / vth[iz])) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * vth[iz] * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + if external_source_settings.electron.active + # Source terms from `add_contribution_from_pdf_term!()` + jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz] / dens[iz] + - (0.5 * source_pressure_amplitude[iz] + + source_momentum_amplitude[iz]) / ppar[iz] + ) + end + jacobian_matrix[row,ppar_offset+iz] += + dt * f[ivpa,ivperp,iz] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] + + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] + + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) + for icolz ∈ 1:z.n + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] + - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_matrix[iz,icolz] + end + end + + return nothing +end + +function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt, + ir; ppar_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = ppar_offset + iz + + # Backward-Euler forcing term + jacobian_matrix[row,row] += dt / ion_dt + end + + return nothing +end + # function check_electron_pdf_convergence!(electron_pdf_converged, pdf_new, pdf) # # check to see if the electron pdf has converged to within the specified tolerance # # NB: the convergence criterion is based on the average relative difference between the diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 49072449d..ab083ae05 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -4,9 +4,12 @@ module electron_vpa_advection export electron_vpa_advection! export update_electron_speed_vpa! +export add_electron_vpa_advection_to_Jacobian! using ..looping +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..calculus: derivative!, second_derivative! +using ..gauss_legendre: gausslegendre_info """ calculate the wpa-advection term for the electron kinetic equation @@ -86,4 +89,177 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, return nothing end +function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, + vpa, z_spectral, vpa_spectral, + vpa_advect, scratch_dummy, + external_source_settings, dt, ir; + f_offset=0, ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + v_size = vperp.n * vpa.n + source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir] + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + if !isa(vpa_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by " + * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we " + * "need differentiation matrices.") + end + + z_deriv_matrix = z_spectral.D_matrix + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + ielement_vpa = vpa.ielement[ivpa] + igrid_vpa = vpa.igrid[ivpa] + icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1) + f_offset + icolumn_max_vpa = vpa.imax[ielement_vpa] + f_offset + + vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir] + + # Contributions from + # (1/2*vth/p*dp/dz + 1/2*w_∥/p*dq/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ + if ielement_vpa == 1 && igrid_vpa == 1 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa] + elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + elseif igrid_vpa == vpa.ngrid + # Note igrid_vpa is only ever 1 when ielement_vpa==1, because + # of the way element boundaries are counted. + icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 + icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] + if vpa_speed < 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + elseif vpa_speed > 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + else + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + end + else + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_spectral.lobatto.Dmat[igrid_vpa,:] ./ vpa.element_scale[ielement_vpa] + end + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # w_∥*0.5/p*dq/dz = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*vth*∫dw_∥ w_∥^3 dg/dz + # d(w_∥*0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # w_∥*(1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + w_∥*vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] + - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + end + jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) + for icolz ∈ 1:z.n + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_matrix[iz,icolz] + end + # (1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # = (1/2*sqrt(2/p/n)*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/sqrt(2*p*n) + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/sqrt(2*p*n*me)*dp/dz - 1/2*sqrt(2*p/n/me)/n*dn/dz + # d(dvth/dz[irowz])/d(ppar[icolz]) = + # (-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz)[irowz] * delta(irowz,icolz) + # +1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + # ⇒ d((1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n)[irowz]/d(ppar[icolz]) + # = (-1/4*sqrt(2/n/me)/p^(3/2)*dp/dz + # - w_∥^2*(-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz) + # - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2) + # + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz) + # + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz] + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] + - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) + ) * dpdf_dvpa[ivpa,ivperp,iz] + if external_source_settings.electron.active + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.5*source_density_amplitude[iz]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz] + 2.0*upar[iz]*source_momentum_amplitude[iz])/ppar[iz]^2 + ) * dpdf_dvpa[ivpa,ivperp,iz] + end + for icolz ∈ 1:z.n + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * ( + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me) + - vpa.grid[ivpa]^2/sqrt(2.0*ppar[iz]*dens[iz]*me) + ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_matrix[iz,icolz] + end + end + + return nothing +end + end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 39076384d..3d2bd4da0 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -4,9 +4,12 @@ module electron_z_advection export electron_z_advection! export update_electron_speed_z! +export add_electron_z_advection_to_Jacobian! using ..advection: advance_f_df_precomputed! +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..chebyshev: chebyshev_info +using ..gauss_legendre: gausslegendre_info using ..looping using ..derivatives: derivative_z_pdf_vpavperpz! using ..calculus: second_derivative! @@ -68,4 +71,97 @@ function update_electron_speed_z!(advect, upar, vth, vpa) return nothing end +function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, vth, + me, z, vperp, vpa, z_spectral, z_advect, + scratch_dummy, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + v_size = vperp.n * vpa.n + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_advect[1].speed[:,ivpa,ivperp,ir] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral z-coordinate type is supported by " + * "add_electron_z_advection_to_Jacobian!() preconditioner because we need " + * "differentiation matrices.") + end + z_deriv_matrix = z_spectral.D_matrix + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset + + ielement_z = z.ielement[iz] + igrid_z = z.igrid[iz] + icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) + icolumn_max_z = z.imax[ielement_z] + + z_speed = z_advect[1].speed[iz,ivpa,ivperp,ir] + + # Contributions from (w_∥*vth + upar)*dg/dz + if ielement_z == 1 && igrid_z == 1 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z] + elseif ielement_z == z.nelement_local && igrid_z == z.ngrid + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + elseif igrid_z == z.ngrid + # Note igrid_z is only ever 1 when ielement_z==1, because + # of the way element boundaries are counted. + icolumn_min_z_next = z.imin[ielement_z+1] - 1 + icolumn_max_z_next = z.imax[ielement_z+1] + if z_speed < 0.0 + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + elseif z_speed > 0.0 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + end + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_spectral.lobatto.Dmat[igrid_z,:] ./ z.element_scale[ielement_z] + end + # vth = sqrt(2*p/n/me) + # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth + # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz + jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] + end + + return nothing +end + end diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 3a5840b56..bc5a98e6a 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -14,9 +14,11 @@ module external_sources export setup_external_sources!, external_ion_source!, external_neutral_source!, external_ion_source_controller!, external_neutral_source_controller!, initialize_external_source_amplitude!, - initialize_external_source_controller_integral! + initialize_external_source_controller_integral!, + add_external_electron_source_to_Jacobian! using ..array_allocation: allocate_float, allocate_shared_float +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..calculus using ..communication using ..coordinates @@ -801,6 +803,77 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u return nothing end +function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, + me, z_speed, external_source_settings, + z, vperp, vpa, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + if !external_source_settings.electron.active + return nothing + end + + source_amplitude = moments.electron.external_source_amplitude + source_T = external_source_settings.electron.source_T + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + if vperp.n == 1 + vth_factor = 1.0 / sqrt(source_T / me) + else + vth_factor = 1.0 / sqrt(source_T / me)^1.5 + end + vperp_grid = vperp.grid + vpa_grid = vpa.grid + v_size = vperp.n * vpa.n + + begin_z_vperp_vpa_region() + if external_source_settings.electron.source_type == "energy" + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contribution from `external_electron_source!()` + jacobian_matrix[row,row] += dt * source_amplitude[iz] + end + end + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contributions from + # -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) + # Using + # d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz) + # + # d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz]) + # = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + # = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] * + (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) * + exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T) + end + + return nothing +end + """ external_neutral_source!(pdf, fvec, moments, neutral_source_settings, vzeta, vr, vz, dt) diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 72f41d9f6..2226a7462 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -3,9 +3,11 @@ module krook_collisions export setup_krook_collisions_input, get_collision_frequency_ii, get_collision_frequency_ee, - get_collision_frequency_ei, krook_collisions!, electron_krook_collisions! + get_collision_frequency_ei, krook_collisions!, electron_krook_collisions!, + add_electron_krook_collisions_to_Jacobian! using ..looping +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..input_structs: krook_collisions_input, set_defaults_and_check_section! using ..reference_parameters: get_reference_collision_frequency_ii, get_reference_collision_frequency_ee, @@ -417,4 +419,56 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ return nothing end +function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, upar_ion, collisions, z, vperp, + vpa, z_speed, dt, ir; f_offset=0, + ppar_offset) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + + if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 + return nothing + end + + v_size = vperp.n * vpa.n + + using_reference_parameters = (collisions.krook.frequency_option == "reference_parameters") + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) + nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) + jacobian_matrix[row,row] += dt * (nu_ee + nu_ei) + + fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2) + # d(f_M(u_i)[irowz])/d(ppar[icolz]) + # = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz) + # = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i + + if using_reference_parameters + # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2, + # so + # d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz) + # d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz] + # = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * 1.5 / ppar[iz] * + (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) + + nu_ei * (f[ivpa,ivperp,iz] - fM_i)) + end + end + + return nothing +end + end # krook_collisions diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 3712ba994..1d32c1d1c 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -39,6 +39,7 @@ include("geo.jl") include("gyroaverages.jl") include("velocity_moments.jl") include("velocity_grid_transforms.jl") +include("boundary_conditions.jl") include("electron_fluid_equations.jl") include("em_fields.jl") include("bgk.jl") @@ -48,7 +49,6 @@ include("moment_constraints.jl") include("fokker_planck_test.jl") include("fokker_planck_calculus.jl") include("fokker_planck.jl") -include("boundary_conditions.jl") include("advection.jl") include("vpa_advection.jl") include("z_advection.jl") From 78af080fdc88e29a1ada17adb6176027d73c72fa Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 20 Aug 2024 21:45:08 +0100 Subject: [PATCH 029/107] Fix 'constant' bc, make it compatible with moment-kinetic mode (?) 'constant' boundary condition may still not be consistent with moment constraints, but does now use the 'unnormalised' speed to set the incoming Maxwellian distribution, and the incoming distribution is normalised by n/vth. Also fix normalisation - the 1/sqrt(pi) is now included in the normalisation of f (and in the integration routines) so does not need to be divided out here. --- moment_kinetics/src/boundary_conditions.jl | 78 +++++++++++++++++----- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 3f63f459e..4cf90e35d 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -146,16 +146,40 @@ function enforce_z_boundary_condition!(pdf, density, upar, ppar, moments, bc::St density_offset = 1.0 vwidth = 1.0 if z.irank == 0 - @loop_s_r_vperp_vpa is ir ivperp ivpa begin - if adv[is].speed[ivpa,1,ir] > 0.0 - pdf[ivpa,ivperp,1,ir,is] = density_offset * exp(-(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/vwidth^2) / sqrt(pi) + @loop_s is begin + speed = adv[is].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + prefactor /= density[1,ir,is] + end + if moments.evolve_ppar + prefactor *= moments.ion.vth[1,ir,is] + end + @loop_vperp_vpa ivperp ivpa begin + if speed[1,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,1,ir,is] = prefactor * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end end end end if z.irank == z.nrank - 1 - @loop_s_r_vperp_vpa is ir ivperp ivpa begin - if adv[is].speed[ivpa,end,ir] > 0.0 - pdf[ivpa,ivperp,end,ir,is] = density_offset * exp(-(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/vwidth^2) / sqrt(pi) + @loop_s is begin + speed = adv[is].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + prefactor /= density[end,ir,is] + end + if moments.evolve_ppar + prefactor *= moments.ion.vth[end,ir,is] + end + @loop_vperp_vpa ivperp ivpa begin + if speed[end,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,end,ir,is] = prefactor * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end end end end @@ -327,20 +351,42 @@ function enforce_neutral_z_boundary_condition!(pdf, density, uz, pz, moments, de density_offset = 1.0 vwidth = 1.0 if z.irank == 0 - @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin - if adv[isn].speed[ivz,ivr,ivzeta,1,ir] > 0.0 - pdf[ivz,ivr,ivzeta,1,ir,is] = density_offset * - exp(-(vzeta.grid[ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) / - sqrt(pi) + @loop_sn isn begin + speed = adv[isn].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + density_offset /= density[1,ir,isn] + end + if moments.evolve_ppar + density_offset *= moments.neutral.vth[1,ir,isn] + end + @loop_vzeta_vr_vz ivzeta ivr ivz begin + if speed[1,ivz,ivr,ivzeta,ir] > 0.0 + pdf[ivz,ivr,ivzeta,1,ir,isn] = prefactor * + exp(-(speed[1,ivz,ivr,ivzeta,ir]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) + end + end end end end if z.irank == z.nrank - 1 - @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin - if adv[isn].speed[ivz,ivr,ivzeta,end,ir] > 0.0 - pdf[ivz,ivr,ivzeta,end,ir,is] = density_offset * - exp(-(vzeta.grid[ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) / - sqrt(pi) + @loop_sn isn begin + speed = adv[isn].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + density_offset /= density[end,ir,isn] + end + if moments.evolve_ppar + density_offset *= moments.neutral.vth[end,ir,isn] + end + @loop_vzeta_vr_vz ivzeta ivr ivz begin + if speed[end,ivz,ivr,ivzeta,ir] > 0.0 + pdf[ivz,ivr,ivzeta,end,ir,isn] = prefactor * + exp(-(speed[end,ivz,ivr,ivzeta,ir][ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) + end + end end end end From da9e415b643388c09b0ca85115c150dfda5d1de9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 20 Aug 2024 21:52:46 +0100 Subject: [PATCH 030/107] Implement "constant" z-boundary condition for electrons --- .../src/electron_kinetic_equation.jl | 56 +++++++++++++------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index eb269397c..0f1583971 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1202,14 +1202,14 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_electron_newvar, f_old, and residual - # should all be zero at exactly the same set of grid points, so it is - # reasonable to zero-out `residual` to impose the boundary condition. We - # impose this after subtracting f_old in case rounding errors, etc. mean - # that at some point f_old had a different boundary condition cut-off - # index. + if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. begin_vperp_vpa_region() v_unnorm = vpa.scratch zero = 1.0e-14 @@ -1615,14 +1615,14 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, vperp_adv, vperp_diffusion) end - if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. + if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note that + # as density, upar, ppar do not change in this implicit step, f_new, + # f_old, and residual should all be zero at exactly the same set of grid + # points, so it is reasonable to zero-out `residual` to impose the + # boundary condition. We impose this after subtracting f_old in case + # rounding errors, etc. mean that at some point f_old had a different + # boundary condition cut-off index. begin_vperp_vpa_region() v_unnorm = vpa.scratch zero = 1.0e-14 @@ -1845,6 +1845,28 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp if z.bc == "periodic" # Nothing more to do for z-periodic boundary conditions return nothing + elseif z.bc == "constant" + begin_r_vperp_vpa_region() + density_offset = 1.0 + vwidth = 1.0/sqrt(composition.me_over_mi) + dens = moments.electron.dens + if z.irank == 0 + speed = z_adv[1].speed + @loop_r_vperp_vpa ir ivperp ivpa begin + if speed[1,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,1,ir,is] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end + end + if z.irank == z.nrank - 1 + speed = z_adv[is].speed + @loop_r_vperp_vpa ir ivperp ivpa begin + if speed[end,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,end,ir,is] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end + end + return nothing end # first enforce the boundary condition at z_min. @@ -1888,7 +1910,7 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp if z.irank == 0 if z.bc != "wall" - error("Options other than wall or z-periodic bc not implemented yet for electrons") + error("Options other than wall, constant or z-periodic bc not implemented yet for electrons") end @loop_r ir begin # Impose sheath-edge boundary condition, while also imposing moment From f9bc69b2898ae135cbc26c8d3f8183f94d0bc611 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 21 Aug 2024 09:54:55 +0100 Subject: [PATCH 031/107] Add argument to skip electron solve when calling setup_moment_kinetics() This can be useful for setting up tests. --- moment_kinetics/src/initial_conditions.jl | 52 +++++++++++++---------- moment_kinetics/src/moment_kinetics.jl | 6 ++- moment_kinetics/src/time_advance.jl | 6 ++- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 73afb2bdf..0e13826d6 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -266,7 +266,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z external_source_settings, scratch_dummy, scratch, scratch_electron, nl_solver_params, t_params, t_input, num_diss_params, advection_structs, io_input, input_dict; - restart_electron_physics) + restart_electron_physics, skip_electron_solve=false) moments.electron.dens_updated[] = false # initialise the electron density profile @@ -474,7 +474,8 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z advection_structs.electron_vpa_advect, scratch_dummy, collisions, composition, geometry, external_source_settings, num_diss_params, gyroavs, nl_solver_params, t_params, - t_input["electron_t_input"], io_input, input_dict) + t_input["electron_t_input"], io_input, input_dict; + skip_electron_solve=skip_electron_solve) return nothing end @@ -571,7 +572,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field scratch_dummy, collisions, composition, geometry, external_source_settings, num_diss_params, gyroavs, nl_solver_params, t_params, t_input, io_input, - input_dict) + input_dict; skip_electron_solve) # now that the initial electron pdf is given, the electron parallel heat flux should be updated # if using kinetic electrons @@ -705,25 +706,28 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field previous_runs_info, "initial_electron") - # Can't let this counter stay set to 0 - t_params.electron.dfns_output_counter[] = max(t_params.electron.dfns_output_counter[], 1) - success = - @views update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, - fields.phi, r, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, t_params.electron, - collisions, composition, - external_source_settings, num_diss_params, - nl_solver_params.electron_advance, - max_electron_pdf_iterations, - max_electron_sim_time; - io_electron=io_initial_electron, - initial_time=code_time, - residual_tolerance=t_input["initialization_residual_value"], - evolve_ppar=true) - if success != "" - error("!!!max number of iterations for electron pdf update exceeded!!!\n" - * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") + if !skip_electron_solve + # Can't let this counter stay set to 0 + t_params.electron.dfns_output_counter[] = max(t_params.electron.dfns_output_counter[], 1) + success = + @views update_electron_pdf!(scratch_electron, pdf.electron.norm, + moments, fields.phi, r, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + t_params.electron, collisions, + composition, external_source_settings, + num_diss_params, + nl_solver_params.electron_advance, + max_electron_pdf_iterations, + max_electron_sim_time; + io_electron=io_initial_electron, + initial_time=code_time, + residual_tolerance=t_input["initialization_residual_value"], + evolve_ppar=true) + if success != "" + error("!!!max number of iterations for electron pdf update exceeded!!!\n" + * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") + end end # Now run without evolve_ppar=true to get pdf_electron fully to steady state, @@ -731,7 +735,9 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field if global_rank[] == 0 println("Initializing electrons - evolving pdf_electron only to steady state") end - if t_params.implicit_electron_advance + if skip_electron_solve + success = "" + elseif t_params.implicit_electron_advance # Create new nl_solver_info ojbect with higher maximum iterations for # initialisation. initialisation_nl_solver_params = diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 1d32c1d1c..9be3fd932 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -213,7 +213,8 @@ parallel loop ranges, and are only used by the tests in `debug_test/`. function setup_moment_kinetics(input_dict::AbstractDict; restart::Union{Bool,AbstractString}=false, restart_time_index::mk_int=-1, debug_loop_type::Union{Nothing,NTuple{N,Symbol} where N}=nothing, - debug_loop_parallel_dims::Union{Nothing,NTuple{N,Symbol} where N}=nothing) + debug_loop_parallel_dims::Union{Nothing,NTuple{N,Symbol} where N}=nothing, + skip_electron_solve::Bool=false) setup_start_time = now() @@ -348,7 +349,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; dt_before_last_fail, electron_dt, electron_dt_before_last_fail, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, io_input, - restarting, restart_electron_physics, input_dict) + restarting, restart_electron_physics, input_dict; + skip_electron_solve=skip_electron_solve) # This is the closest we can get to the end time of the setup before writing it to the # output file diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 418a9da97..d18cef207 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -479,7 +479,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, io_input, restarting, - restart_electron_physics, input_dict) + restart_electron_physics, input_dict; + skip_electron_solve=false) # define some local variables for convenience/tidiness n_ion_species = composition.n_ion_species n_neutral_species = composition.n_neutral_species @@ -789,7 +790,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop external_source_settings, scratch_dummy, scratch, scratch_electron, nl_solver_params, t_params, t_input, num_diss_params, advection_structs, io_input, input_dict; - restart_electron_physics=restart_electron_physics) + restart_electron_physics=restart_electron_physics, + skip_electron_solve=skip_electron_solve) end # update the derivatives of the electron moments as these may be needed when From 41e32688b63211da0d2d227b1ae74f2aefc2a34e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 21 Aug 2024 15:47:45 +0100 Subject: [PATCH 032/107] Handle absolute path in get_default_restart_filename() Needs special handling because `glob()` errors when there is a single argument which is an absolute path. --- moment_kinetics/src/utils.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/utils.jl b/moment_kinetics/src/utils.jl index 2118c0407..8b934a1f9 100644 --- a/moment_kinetics/src/utils.jl +++ b/moment_kinetics/src/utils.jl @@ -226,7 +226,14 @@ function get_default_restart_filename(io_input, prefix; error_if_no_file_found=t error("Unrecognized binary_format '$binary_format'") end restart_filename_pattern = joinpath(io_input.output_dir, io_input.run_name * ".$prefix*." * ext) - restart_filename_glob = glob(restart_filename_pattern) + if isabspath(restart_filename_pattern) + # Special handling for absolute paths, as these give an error when `glob()` is + # called normally + restart_filename_glob = glob(basename(restart_filename_pattern), + dirname(restart_filename_pattern)) + else + restart_filename_glob = glob(restart_filename_pattern) + end if length(restart_filename_glob) == 0 if error_if_no_file_found error("No '$prefix' output file to restart from found matching the pattern " From 269f7499f5a8f6fd464d18d53439003cd379e4b2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 21 Aug 2024 18:29:48 +0100 Subject: [PATCH 033/107] Handle z-bc in skip_f_electron_bc_points_in_Jacobian() Add handling of "wall" and "constant" z-boundary-conditions in skip_f_electron_bc_points_in_Jacobian(). Just skips points with 'incoming' velocities, does not do anything to handle boundary condition modifications of 'outgoing' velocities (which will happen due to moment constraint enforcement). --- moment_kinetics/src/boundary_conditions.jl | 11 +++++++--- .../src/electron_kinetic_equation.jl | 21 +++++++++++-------- moment_kinetics/src/electron_vpa_advection.jl | 4 ++-- moment_kinetics/src/electron_z_advection.jl | 8 ++++--- moment_kinetics/src/krook_collisions.jl | 2 +- 5 files changed, 28 insertions(+), 18 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 4cf90e35d..3a1db780d 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -1084,10 +1084,15 @@ happens, the corresponding row should be skipped when adding contributions to th matrix, so that the row remains the same as a row of the identity matrix, so that the Jacobian matrix does not modify those points. Returns `false` otherwise. """ -function skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) +function skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) # z boundary condition - if z.bc == "wall" && (iz == 1 || iz == z.n) - error("Need to do something about wall boundary condition in preconditioner matrix") + if z.bc ∈ ("wall", "constant") + if z.irank == 0 && iz == 1 && z_speed[iz,ivpa,ivperp] ≥ 0.0 + return true + end + if z.irank == z.nrank - 1 && iz == z.n && z_speed[iz,ivpa,ivperp] ≤ 0.0 + return true + end end # vperp boundary condition diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 0f1583971..e2f5484f4 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2909,22 +2909,25 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome jacobian_matrix[row,row] += 1.0 end + z_speed = @view z_advect[1].speed[:,:,:,ir] + add_electron_z_advection_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) add_electron_vpa_advection_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, - vpa_advect, scratch_dummy, external_source_settings, dt, ir; ppar_offset=pdf_size) + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; + ppar_offset=pdf_size) add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, scratch_dummy, dt, ir; ppar_offset=pdf_size) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) add_electron_dissipation_term_to_Jacobian!( - jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, dt, ir) + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir) add_electron_krook_collisions_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, - dt, ir; ppar_offset=pdf_size) + z_speed, dt, ir; ppar_offset=pdf_size) add_external_electron_source_to_Jacobian!( jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, vpa, dt, ir; ppar_offset=pdf_size) @@ -3125,8 +3128,8 @@ function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vp end function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params, - z, vperp, vpa, vpa_spectral, dt, ir; - f_offset=0) + z, vperp, vpa, vpa_spectral, z_speed, + dt, ir; f_offset=0) @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n @@ -3141,7 +3144,7 @@ function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) continue end @@ -3399,7 +3402,7 @@ end function add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " @@ -3417,7 +3420,7 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) continue end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index ab083ae05..5aaab5eb7 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -93,7 +93,7 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, vth, third_moment, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, - vpa_advect, scratch_dummy, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; f_offset=0, ppar_offset=0) if f_offset == ppar_offset @@ -132,7 +132,7 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) continue end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 3d2bd4da0..5c5cb0b1d 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -89,9 +89,10 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p begin_vperp_vpa_region() update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed_array = @view z_advect[1].speed[:,:,:,1] @loop_vperp_vpa ivperp ivpa begin - @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_advect[1].speed[:,ivpa,ivperp,ir] + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed_array[:,ivpa,ivperp] end #calculate the upwind derivative @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], @@ -112,7 +113,8 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed_array) continue end @@ -125,7 +127,7 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) icolumn_max_z = z.imax[ielement_z] - z_speed = z_advect[1].speed[iz,ivpa,ivperp,ir] + z_speed = z_speed_array[iz,ivpa,ivperp] # Contributions from (w_∥*vth + upar)*dg/dz if ielement_z == 1 && igrid_z == 1 diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 2226a7462..805c928bb 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -436,7 +436,7 @@ function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, up begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) continue end From d651d327ab2847dbedfa8cfdbc3f1c120d2157d9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 22 Aug 2024 10:51:31 +0100 Subject: [PATCH 034/107] Add `elementwise_isapprox()` for tests Like `isapprox()`, but tests element-by-element, so relatively small values do not get ignored. --- moment_kinetics/test/setup.jl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/test/setup.jl b/moment_kinetics/test/setup.jl index 555824d00..8a157ac73 100644 --- a/moment_kinetics/test/setup.jl +++ b/moment_kinetics/test/setup.jl @@ -13,7 +13,7 @@ using moment_kinetics module MKTestUtilities export use_verbose, force_optional_dependencies, @long, quietoutput, get_MPI_tempdir, - global_rank, global_size, maxabs_norm, @testset_skip + global_rank, global_size, maxabs_norm, elementwise_isapprox, @testset_skip using moment_kinetics.communication: comm_world, global_rank, global_size using moment_kinetics.command_line_options: get_options @@ -83,6 +83,18 @@ between two arrays. """ maxabs_norm(x) = maximum(abs.(x)) +""" + elementwise_isapprox(args...; kwargs...) + +Calls `isapprox()` but forces the comparison to be done element-by-element, rather than +testing `norm(x-y)NaN, kwargs...) +end + """ Get a single temporary directory that is the same on all MPI ranks """ From bf31e91d5f886f126d8d96d7fe5c147654147d24 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 21 Aug 2024 15:56:31 +0100 Subject: [PATCH 035/107] Automated tests for electron Jacobian matrix construction --- moment_kinetics/test/jacobian_matrix_tests.jl | 2126 +++++++++++++++++ moment_kinetics/test/runtests.jl | 1 + 2 files changed, 2127 insertions(+) create mode 100644 moment_kinetics/test/jacobian_matrix_tests.jl diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl new file mode 100644 index 000000000..988806687 --- /dev/null +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -0,0 +1,2126 @@ +module JacobianMatrixTests + +# Tests for construction of Jacobian matrices used for preconditioning + +include("setup.jl") + +using moment_kinetics.analysis: vpagrid_to_dzdt +using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, + enforce_vperp_boundary_condition! +using moment_kinetics.derivatives: derivative_z! +using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!, + electron_energy_equation_no_r!, + add_electron_energy_equation_to_Jacobian! +using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term!, + add_contribution_from_electron_pdf_term_to_Jacobian!, + add_dissipation_term!, + add_electron_dissipation_term_to_Jacobian!, + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!, + electron_kinetic_equation_euler_update!, + fill_electron_kinetic_equation_Jacobian! +using moment_kinetics.electron_vpa_advection: electron_vpa_advection!, + add_electron_vpa_advection_to_Jacobian! +using moment_kinetics.electron_z_advection: electron_z_advection!, + update_electron_speed_z!, + add_electron_z_advection_to_Jacobian! +using moment_kinetics.external_sources: external_electron_source!, + add_external_electron_source_to_Jacobian! +using moment_kinetics.krook_collisions: electron_krook_collisions!, + add_electron_krook_collisions_to_Jacobian! +using moment_kinetics.looping +using moment_kinetics.type_definitions: mk_float +using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r! + +using StatsBase + +# Small parameter used to create perturbations to test Jacobian against +epsilon = 1.0e-6 +test_wavenumber = 2.0 +dt = 1.0 +ion_dt = 1.0e-6 +ir = 1 +zero = 1.0e-14 + +# Test input uses `z_bc = "constant"`, which is not a very physically useful option, but +# is useful for testing because: +# * `z_bc = "wall"` would introduce discontinuities in the distribution function which +# might reduce accuracy and so make it harder to see whether errors are due to a mistake +# in the matrix construction or just due to discretisation error +# * For `z_bc = "periodic"`, the Jacobian matrices (by design) do not account for the +# periodicity. This should be fine when they are used as preconditioners, but does +# introduce errors at the periodic boundaries which would complicate testing. +test_input = Dict("run_name" => "jacobian_matrix", + "n_ion_species" => 1, + "n_neutral_species" => 1, + "electron_physics" => "kinetic_electrons", + "evolve_moments_density" => true, + "evolve_moments_parallel_flow" => true, + "evolve_moments_parallel_pressure" => true, + "evolve_moments_conservation" => true, + "recycling_fraction" => 0.5, + "T_e" => 1.0, + "T_wall" => 0.1, + "initial_density1" => 1.0, + "initial_temperature1" => 1.0, + "z_IC_option1" => "sinusoid", + "z_IC_density_amplitude1" => 0.1, + "z_IC_density_phase1" => mk_float(π), + "z_IC_upar_amplitude1" => 0.1, + "z_IC_upar_phase1" => mk_float(π), + "z_IC_temperature_amplitude1" => 0.1, + "z_IC_temperature_phase1" => mk_float(π), + "vpa_IC_option1" => "gaussian", + "vpa_IC_density_amplitude1" => 1.0, + "vpa_IC_density_phase1" => 0.0, + "vpa_IC_upar_amplitude1" => 0.0, + "vpa_IC_upar_phase1" => 0.0, + "vpa_IC_temperature_amplitude1" => 0.0, + "vpa_IC_temperature_phase1" => 0.0, + "initial_density2" => 1.0, + "initial_temperature2" => 1.0, + "z_IC_option2" => "sinusoid", + "z_IC_density_amplitude2" => 0.001, + "z_IC_density_phase2" => mk_float(π), + "z_IC_upar_amplitude2" => 0.0, + "z_IC_upar_phase2" => mk_float(π), + "z_IC_temperature_amplitude2" => 0.0, + "z_IC_temperature_phase2" => mk_float(π), + "vpa_IC_option2" => "gaussian", + "vpa_IC_density_amplitude2" => 1.0, + "vpa_IC_density_phase2" => 0.0, + "vpa_IC_upar_amplitude2" => 0.0, + "vpa_IC_upar_phase2" => 0.0, + "vpa_IC_temperature_amplitude2" => 0.0, + "vpa_IC_temperature_phase2" => 0.0, + "charge_exchange_frequency" => 0.75, + "ionization_frequency" => 0.0, + "constant_ionization_rate" => false, + "r_ngrid" => 1, + "r_nelement" => 1, + "z_ngrid" => 9, + "z_nelement" => 16, + "z_bc" => "constant", + "z_discretization" => "gausslegendre_pseudospectral", + "vpa_ngrid" => 6, + "vpa_nelement" => 31, + "vpa_L" => 12.0, + "vpa_bc" => "zero", + "vpa_discretization" => "gausslegendre_pseudospectral", + "vpa_element_spacing_option" => "coarse_tails", + "vz_ngrid" => 6, + "vz_nelement" => 31, + "vz_L" => 12.0, + "vz_bc" => "zero", + "vz_discretization" => "gausslegendre_pseudospectral", + "vz_element_spacing_option" => "coarse_tails", + "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 1, + "dt" => ion_dt, + "minimum_dt" => 1.0e-7, + "rtol" => 1.0e-4, + "max_increase_factor_near_last_fail" => 1.001, + "last_fail_proximity_factor" => 1.1, + "max_increase_factor" => 1.05, + "nwrite" => 10000, + "nwrite_dfns" => 10000, + "steady_state_residual" => true, + "converged_residual_value" => 1.0e-3, + ), + "electron_timestepping" => Dict{String,Any}("nstep" => 1, + "dt" => dt, + "maximum_dt" => 1.0, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "type" => "Fekete4(3)", + "rtol" => 1.0e-6, + "atol" => 1.0e-14, + "minimum_dt" => 1.0e-10, + "initialization_residual_value" => 2.5, + "converged_residual_value" => 1.0e-2, + ), + "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, + "rtol" => 1.0e-5, + "atol" => 1.0e-15, + "preconditioner_update_interval" => 1, + ), + "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, + "force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, + "force_minimum_pdf_value" => 0.0, + ), + "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, + "force_minimum_pdf_value" => 0.0, + ), + "ion_source" => Dict{String,Any}("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.125, + "source_strength" => 0.1, + "source_T" => 2.0, + ), + "krook_collisions" => Dict{String,Any}("use_krook" => true), + ) + +function get_mk_state(test_input) + mk_state = nothing + quietoutput() do + mk_state = moment_kinetics.setup_moment_kinetics(test_input; + skip_electron_solve=true) + end + return mk_state +end + +function generate_norm_factor(perturbed_residual::AbstractArray{mk_float,3}) + # half-width of the window for moving average + w = 3 + norm_factor_unsmoothed = mean(abs.(perturbed_residual); dims=3) + # Smooth the 'norm_factor' with a moving average to avoid problems due to places where + # norm_factor happens to be (almost) zero + norm_factor = similar(norm_factor_unsmoothed) + for i ∈ 1:w + norm_factor[i,1,1,1] = mean(norm_factor_unsmoothed[1:i+w,1,1,1]) + end + for i ∈ w+1:size(perturbed_residual, 1)-w + norm_factor[i,1,1,1] = mean(norm_factor_unsmoothed[i-w:i+w,1,1,1]) + end + for i ∈ 1:w + norm_factor[end+1-i,1,1,1] = mean(norm_factor_unsmoothed[end+1-i-w:end,1,1,1]) + end + return norm_factor +end +function generate_norm_factor(perturbed_residual::AbstractArray{mk_float,1}) + norm_factor_unsmoothed = mean(abs.(perturbed_residual); dims=1) +end + +# Quite a large multiplier in rtol for this test, but it is plausible that a nonlinear +# error (∼epsilon^2) could be multiplied by ∼vth*vpa.L/2∼sqrt(2)*60*6≈500. +function test_electron_z_advection(test_input; rtol=(3.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_z_advection" + println(" electron_z_advection") + + @testset "electron_z_advection" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_z_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, + z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_z_advection!(residual, this_f, upar, vth, z_advect, z, vpa.grid, + z_spectral, scratch_dummy, dt, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_electron_vpa_advection(test_input; rtol=(5.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_vpa_advection" + println(" electron_vpa_advection") + + @testset "electron_vpa_advection" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; + ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_vpa_advection!(residual, this_f, dens, upar, this_p, moments, + vpa_advect, vpa, vpa_spectral, scratch_dummy, dt, + external_source_settings.electron, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_contribution_from_electron_pdf_term" + println(" contribution_from_electron_pdf_term") + + @testset "contribution_from_electron_pdf_term" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + add_contribution_from_pdf_term!(residual, this_f, this_p, dens, upar, moments, + vpa.grid, z, dt, + external_source_settings.electron, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_dissipation_term" + println(" electron_dissipation_term") + + @testset "electron_dissipation_term" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, + ir) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + add_dissipation_term!(residual, this_f, scratch_dummy, z_spectral, z, vpa, + vpa_spectral, num_diss_params, dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_krook_collisions" + println(" electron_krook_collisions") + + @testset "electron_krook_collisions" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + # Modify upar_ion to make sure it is different from upar_electron so that the term + # proportional to (u_i-u_e) gets tested in case it is ever needed. + upar_ion = @view moments.ion.upar[:,ir,1] + @. upar_ion += sin(4.0*π*test_wavenumber*z.grid/z.L) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]), + collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + @views electron_krook_collisions!(residual, this_f, dens, upar, + moments.ion.upar[:,ir], vth, collisions, + vperp, vpa, dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_external_electron_source" + println(" external_electron_source") + + @testset "external_electron_source" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, + vpa, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + external_electron_source!(residual, this_f, dens, upar, moments, composition, + external_source_settings.electron, vperp, vpa, dt, + ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_electron_energy_equation(test_input; rtol=(1.5e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_energy_equation" + println(" electron_energy_equation") + + @testset "electron_energy_equation" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, + z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_region() + @loop_z iz begin + residual[iz] = ppar[iz] + end + @views electron_energy_equation_no_r!( + residual, this_p, dens, upar, moments.ion.dens[:,ir], + moments.ion.upar[:,ir], moments.ion.ppar[:,ir], + moments.neutral.dens[:,ir], moments.neutral.uz[:,ir], + moments.neutral.pz[:,ir], moments.electron, collisions, dt, + composition, external_source_settings.electron, num_diss_params, z, + ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_region() + @loop_z iz begin + residual[iz] = this_p[iz] - residual[iz] + end + end + + original_residual = zeros(mk_float, size(ppar)) + perturbed_residual = zeros(mk_float, size(ppar)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_ion_dt_forcing_of_electron_ppar" + println(" ion_dt_forcing_of_electron_ppar") + + @testset "ion_dt_forcing_of_electron_ppar" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_region() + @loop_z iz begin + residual[iz] = ppar[iz] + end + ppar_previous_ion_step = moments.electron.ppar + begin_z_region() + @loop_z iz begin + # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a + # source/damping term so that in the steady state of the electron + # pseudo-timestepping iteration, + # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, + # resulting in a backward-Euler step (as long as the pseudo-timestepping + # loop converges). + residual[iz] += -dt * (this_p[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt + end + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_region() + @loop_z iz begin + residual[iz] = this_p[iz] - residual[iz] + end + end + + original_residual = zeros(mk_float, size(ppar)) + perturbed_residual = zeros(mk_float, size(ppar)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_f does not affect this term. + @test elementwise_isapprox(perturbed_residual, + perturbed_with_Jacobian; + rtol=0.0, atol=1.0e-15) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + +function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_kinetic_equation" + println(" electron_kinetic_equation") + + @testset "electron_kinetic_equation" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vperp_spectral = spectral_objects.vperp_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, dt, ion_dt, + ir, true, scratch_dummy.buffer_vpavperp_1, scratch_dummy.buffer_vpavperp_2) + + function residual_func!(residual_f, residual_p, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + residual_p[iz] = ppar[iz] + end + electron_kinetic_equation_euler_update!( + residual_f, residual_p, this_f, this_p, moments, z, vperp, vpa, + z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, num_diss_params, dt, ir; + evolve_ppar=true, ion_dt=ion_dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual_f[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual_f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + residual_p[iz] = this_p[iz] - residual_p[iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual_f[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual_f, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual_f[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual_f[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual_f = zeros(mk_float, size(f)) + original_residual_p = zeros(mk_float, size(ppar)) + perturbed_residual_f = zeros(mk_float, size(f)) + perturbed_residual_p = zeros(mk_float, size(ppar)) + + @testset "δf only" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + + @testset "δf and δp" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + end + + return nothing +end + +function runtests() + # Only run this test in serial, for simplicity. We are testing correctness of the + # matrix construction here, not performance or parallelisation, etc. + if global_size[] > 1 + @testset_skip "Jacobian matrix tests are only implemented for serial runs." "Jacobaian matrix" + return nothing + end + + # Create a temporary directory for test output + test_output_directory = get_MPI_tempdir() + test_input["base_directory"] = test_output_directory + + @testset "Jacobian matrix" verbose=use_verbose begin + println(" Jacobian matrix") + + test_electron_z_advection(test_input) + test_electron_vpa_advection(test_input) + test_contribution_from_electron_pdf_term(test_input) + test_electron_dissipation_term(test_input) + test_electron_krook_collisions(test_input) + test_external_electron_source(test_input) + test_electron_energy_equation(test_input) + test_ion_dt_forcing_of_electron_ppar(test_input) + test_electron_kinetic_equation(test_input) + end + + if global_rank[] == 0 + # Delete output directory to avoid using too much disk space + rm(realpath(test_output_directory); recursive=true) + end + + return nothing +end + +end # JacobianMatrixTests + + +using .JacobianMatrixTests + +JacobianMatrixTests.runtests() diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl index 73d688f06..97731c66a 100644 --- a/moment_kinetics/test/runtests.jl +++ b/moment_kinetics/test/runtests.jl @@ -20,6 +20,7 @@ function runtests() include(joinpath(@__DIR__, "fokker_planck_tests.jl")) include(joinpath(@__DIR__, "fokker_planck_time_evolution_tests.jl")) include(joinpath(@__DIR__, "gyroaverage_tests.jl")) + include(joinpath(@__DIR__, "jacobian_matrix_tests.jl")) end end From bab63e8ee034f2e34290b63e0bfb115dd088c292 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 26 Aug 2024 16:15:40 +0100 Subject: [PATCH 036/107] Use 'soft forcing' for moment constraints in implicit electron solves --- moment_kinetics/src/coordinates.jl | 14 +- .../src/electron_kinetic_equation.jl | 44 ++- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_constraints.jl | 91 +++++- moment_kinetics/src/moment_kinetics_input.jl | 2 + moment_kinetics/src/time_advance.jl | 1 + moment_kinetics/test/jacobian_matrix_tests.jl | 286 +++++++++++++++++- 7 files changed, 406 insertions(+), 33 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 158de0074..976015583 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -96,6 +96,9 @@ struct coordinate{T <: AbstractVector{mk_float}} # scratch_shared2 is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared2::T + # scratch_shared3 is a shared-memory array used for intermediate calculations requiring + # n entries + scratch_shared3::T # scratch_2d and scratch2_2d are arrays used for intermediate calculations requiring # ngrid x nelement entries scratch_2d::Array{mk_float,2} @@ -168,15 +171,18 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing if ignore_MPI scratch_shared = allocate_float(n_local) scratch_shared2 = allocate_float(n_local) + scratch_shared3 = allocate_float(n_local) else scratch_shared = allocate_shared_float(n_local) scratch_shared2 = allocate_shared_float(n_local) + scratch_shared3 = allocate_shared_float(n_local) end - # Initialise scratch_shared and scratch_shared2 so that the debug checks do not - # complain when they get printed by `println(io, all_inputs)` in mk_input(). + # Initialise scratch_shared* so that the debug checks do not complain when they get + # printed by `println(io, all_inputs)` in mk_input(). if block_rank[] == 0 scratch_shared .= NaN scratch_shared2 .= NaN + scratch_shared3 .= NaN end if !ignore_MPI _block_synchronize() @@ -236,8 +242,8 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option, input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), - copy(scratch), copy(scratch), scratch_shared, scratch_shared2, scratch_2d, - copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm, + copy(scratch), copy(scratch), scratch_shared, scratch_shared2, scratch_shared3, + scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm, local_io_range, global_io_range, element_scale, element_shift, input.element_spacing_option, element_boundaries, radau_first_element, other_nodes, one_over_denominator) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index e2f5484f4..231bd35c2 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -11,7 +11,7 @@ using ..analysis: steady_state_residuals using ..derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using ..boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition!, - skip_f_electron_bc_points_in_Jacobian + skip_f_electron_bc_points_in_Jacobian, vpagrid_to_dzdt using ..calculus: derivative!, second_derivative!, integral using ..communication using ..gauss_legendre: gausslegendre_info @@ -40,7 +40,9 @@ using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee get_collision_frequency_ei, add_electron_krook_collisions_to_Jacobian! using ..moment_constraints: hard_force_moment_constraints!, - moment_constraints_on_residual! + moment_constraints_on_residual!, + electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct using ..nonlinear_solvers using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm, @@ -366,7 +368,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll scratch[istage].electron_ppar[:,ir], moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, t_params.dt[], ir; evolve_ppar=evolve_ppar, + num_diss_params, t_params, ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt) end speedup_hack!(scratch[istage+1], scratch[istage], z_speedup_fac, z, vpa; @@ -795,7 +797,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, t_params.dt[], ir) + num_diss_params, t_params, ir) # Calculate heat flux and derivatives using updated f_electron @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], electron_ppar_new, @@ -1038,7 +1040,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos precon_matrix, f_electron_new, electron_ppar_new, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.dt[], ion_dt, + external_source_settings, num_diss_params, t_params, ion_dt, ir, evolve_ppar) begin_serial_region() @@ -1169,8 +1171,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos f_electron_residual, electron_ppar_residual, f_electron_newvar, electron_ppar_newvar, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, num_diss_params, t_params.dt[], - ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt) + composition, external_source_settings, num_diss_params, t_params, + ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt, + soft_force_constraints=true) # Now # residual = f_electron_old + dt*RHS(f_electron_newvar) @@ -1595,7 +1598,8 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo f_electron_residual, electron_ppar_residual, f_electron_new, electron_ppar_new, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, pdf_electron_normalisation_factor, ir) + num_diss_params, pdf_electron_normalisation_factor, t_params, ir; + soft_force_constraints=true) @loop_z_vperp_vpa iz ivperp ivpa begin f_electron_residual[ivpa,ivperp,iz] /= sqrt(1.0 + vpa.grid[ivpa]^2) end @@ -2757,7 +2761,7 @@ end z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, dt, ir; + num_diss_params, t_params, ir; evolve_ppar=false, ion_dt=nothing) Do a forward-Euler update of the electron kinetic equation. @@ -2772,8 +2776,11 @@ function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, dt, ir; - evolve_ppar=false, ion_dt=nothing) + num_diss_params, t_params, ir; + evolve_ppar=false, ion_dt=nothing, + soft_force_constraints=false) + dt = t_params.dt[] + # add the contribution from the z advection term @views electron_z_advection!(f_out, f_in, moments.electron.upar[:,ir], moments.electron.vth[:,ir], z_advect, z, vpa.grid, @@ -2812,6 +2819,12 @@ function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, vperp, vpa, dt, ir) end + if soft_force_constraints + electron_implicit_constraint_forcing!(f_out, f_in, + t_params.constraint_forcing_rate, vpa, dt, + ir) + end + if evolve_ppar @views electron_energy_equation_no_r!( ppar_out, ppar_in, moments.electron.dens[:,ir], @@ -2846,7 +2859,7 @@ end composition, z, vperp, vpa, z_spectral, vperp_specral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, dt, ion_dt, + num_diss_params, t_params, ion_dt, ir, evolve_ppar) Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if @@ -2857,8 +2870,10 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, dt, ion_dt, ir, + num_diss_params, t_params, ion_dt, ir, evolve_ppar) + dt = t_params.dt[] + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] @@ -2931,6 +2946,9 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome add_external_electron_source_to_Jacobian!( jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, vpa, dt, ir; ppar_offset=pdf_size) + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt, + ir) if evolve_ppar add_electron_energy_equation_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index f1505b0ca..e669d2ce5 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -88,6 +88,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero implicit_ion_advance::Bool implicit_vpa_advection::Bool implicit_electron_ppar::Bool + constraint_forcing_rate::mk_float write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index f8c0a2274..e99599e4e 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -5,12 +5,15 @@ function. """ module moment_constraints +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..communication: _block_synchronize using ..looping using ..type_definitions: mk_float using ..velocity_moments: integrate_over_vspace, update_qpar! -export hard_force_moment_constraints!, hard_force_moment_constraints_neutral! +export hard_force_moment_constraints!, hard_force_moment_constraints_neutral!, + electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian! """ hard_force_moment_constraints!(f, moments, vpa) @@ -229,4 +232,90 @@ function moment_constraints_on_residual!(residual::AbstractArray{T,N}, return A, B, C end +""" + electron_implicit_constraint_forcing!(f_out, f_in, constraint_forcing_rate, vpa, + dt, ir) + +Add terms to the electron kinetic equation that force the moment constraints to be +approximately satisfied. Needed to avoid large errors when taking large, implicit +timesteps that do not guarantee accurate time evolution. +""" +function electron_implicit_constraint_forcing!(f_out, f_in, constraint_forcing_rate, vpa, + dt, ir) + begin_z_region() + vpa_grid = vpa.grid + @loop_z iz begin + @views zeroth_moment = integrate_over_vspace(f_in[:,1,iz], vpa.wgts) + @views first_moment = integrate_over_vspace(f_in[:,1,iz], vpa.grid, vpa.wgts) + @views second_moment = integrate_over_vspace(f_in[:,1,iz], vpa.grid, 2, vpa.wgts) + + @loop_vperp_vpa ivperp ivpa begin + f_out[ivpa,ivperp,iz] += + dt * constraint_forcing_rate * + ((1.0 - zeroth_moment) + - first_moment*vpa_grid[ivpa] + + (0.5 - second_moment)*vpa_grid[ivpa]^2) * f_in[ivpa,ivperp,iz] + end + end + + return nothing +end + +""" + add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, + z_speed, z, vperp, vpa, + constraint_forcing_rate, + dt, ir; f_offset=0) + +Add the contributions corresponding to [`electron_implicit_constraint_forcing!`](@ref) to +`jacobian_matrix`. +""" +function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, + z_speed, z, vperp, vpa, + constraint_forcing_rate, + dt, ir; f_offset=0) + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + v_size = vperp.n * vpa.n + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) + end + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Diagonal terms + jacobian_matrix[row,row] += -dt * constraint_forcing_rate * + ((1.0 - zeroth_moment[iz]) + - first_moment[iz]*vpa_grid[ivpa] + + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2) + + # Integral terms + # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * constraint_forcing_rate * + (1.0 + + vpa_grid[icolvpa]*vpa_grid[ivpa] + + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) * + vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz] + end + end + + return nothing +end + end diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 12e0a879f..187112d15 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -231,6 +231,7 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) implicit_ion_advance=false, implicit_vpa_advection=false, implicit_electron_ppar=false, + constraint_forcing_rate=0.0, write_after_fixed_step_count=false, write_error_diagnostics=false, write_steady_state_diagnostics=false, @@ -267,6 +268,7 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) last_fail_proximity_factor=timestepping_section["last_fail_proximity_factor"], minimum_dt=timestepping_section["minimum_dt"] * sqrt(composition.me_over_mi), maximum_dt=timestepping_section["maximum_dt"] * sqrt(composition.me_over_mi), + constraint_forcing_rate=1.0e6, write_after_fixed_step_count=false, write_error_diagnostics=false, write_steady_state_diagnostics=false, diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d18cef207..201124755 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -456,6 +456,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], electron !== nothing && t_input["implicit_electron_ppar"], + t_input["constraint_forcing_rate"], t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], t_input["converged_residual_value"], diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 988806687..205d32e49 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -28,6 +28,8 @@ using moment_kinetics.external_sources: external_electron_source!, using moment_kinetics.krook_collisions: electron_krook_collisions!, add_electron_krook_collisions_to_Jacobian! using moment_kinetics.looping +using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian! using moment_kinetics.type_definitions: mk_float using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r! @@ -36,7 +38,7 @@ using StatsBase # Small parameter used to create perturbations to test Jacobian against epsilon = 1.0e-6 test_wavenumber = 2.0 -dt = 1.0 +dt = 0.42 ion_dt = 1.0e-6 ir = 1 zero = 1.0e-14 @@ -141,6 +143,7 @@ test_input = Dict("run_name" => "jacobian_matrix", "minimum_dt" => 1.0e-10, "initialization_residual_value" => 2.5, "converged_residual_value" => 1.0e-2, + "constraint_forcing_rate" => 2.321, ), "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, "rtol" => 1.0e-5, @@ -230,9 +233,11 @@ function test_electron_z_advection(test_input; rtol=(3.0e1*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -454,9 +459,11 @@ function test_electron_vpa_advection(test_input; rtol=(5.0e1*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -698,9 +705,11 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -906,9 +915,11 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1117,9 +1128,11 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1342,9 +1355,11 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1514,6 +1529,238 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) return nothing end +# For this test where only the 'constraint forcing' term is added to the residual, the +# residual is exactly zero for the initial condition (because that is constructed to obey +# the constraints). Therefore the 'perturbed_residual' is non-zero only because of +# delta_f, which is small, O(epsilon), so 'norm_factor' is also O(epsilon). We therefore +# use a tolerance of O(epsilon) in this test, unlike the other tests which use a tolerance +# of O(epsilon^2). Note that in the final test of the full electron kinetic equations, +# with all terms including this one, we do not have a similar issue, as there the other +# terms create an O(1) residual for the initial condition, which will then set the +# 'norm_factor'. +# +# We test the Jacobian for these constraint forcing terms using +# constraint_forcing_rate=O(1), because in these tests we set dt=O(1), so a large +# coefficient would make the non-linearity large and then it would be hard to distinguish +# errors from non-linearity (or rounding errors) in `test_electron_kinetic_equation()` +# that tests the combined effect of all terms in the electron kinetic equation. This test +# would actually be OK because the ratio of linear to non-linear contributions of this +# single term does not depend on the size of the coefficient. In the combined test, we are +# effectively comparing the non-linear error from this term to the residual from other +# terms, so the coefficient of this term matters there. Even though these settings are not +# what we would use in a real simulation, they should tell us if the implementation is +# correct. +function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsilon)) + test_input = deepcopy(test_input) + test_input["run_name"] *= "_electron_implicit_constraint_forcing" + println(" electron_implicit_constraint_forcing") + + @testset "electron_implicit_constraint_forcing" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = similar(ppar) + p_amplitude = epsilon * maximum(ppar) + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + f = @view pdf.electron.norm[:,:,:,ir] + delta_f = similar(f) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = zeros(mk_float, total_size, total_size) + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix, f, z_speed, z, vperp, vpa, + t_params.electron.constraint_forcing_rate, dt, ir) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_implicit_constraint_forcing!(residual, this_f, + t_params.electron.constraint_forcing_rate, + vpa, dt, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = zeros(mk_float, size(f)) + perturbed_residual = zeros(mk_float, size(f)) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_p does not affect this term, and `f` (with no `delta_f`) obeys the + # constraints exactly, so this term vanishes. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=1.0e-15) + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + + end + + return nothing +end + function test_electron_energy_equation(test_input; rtol=(1.5e2*epsilon)^2) test_input = deepcopy(test_input) test_input["run_name"] *= "_electron_energy_equation" @@ -1568,9 +1815,11 @@ function test_electron_energy_equation(test_input; rtol=(1.5e2*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1732,9 +1981,11 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1814,7 +2065,9 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 delta_state[1:pdf_size]; atol=1.0e-15) # No norm factor, because both perturbed residuals should be zero here, as - # delta_f does not affect this term. + # delta_f does not affect this term, and `ppar` is used as + # `ppar_previous_ion_step` in this test, so the residuals are exactly zero if + # there is no delta_p. @test elementwise_isapprox(perturbed_residual, perturbed_with_Jacobian; rtol=0.0, atol=1.0e-15) @@ -1896,9 +2149,11 @@ function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) f = @view pdf.electron.norm[:,:,:,ir] delta_f = similar(f) f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. delta_f .= f_amplitude .* reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L), vpa.n, 1, 1) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* f pdf_size = length(f) @@ -1914,8 +2169,8 @@ function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) fill_electron_kinetic_equation_Jacobian!( jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, dt, ion_dt, - ir, true, scratch_dummy.buffer_vpavperp_1, scratch_dummy.buffer_vpavperp_2) + external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, + true, scratch_dummy.buffer_vpavperp_1, scratch_dummy.buffer_vpavperp_2) function residual_func!(residual_f, residual_p, this_f, this_p) begin_z_region() @@ -1951,8 +2206,8 @@ function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) electron_kinetic_equation_euler_update!( residual_f, residual_p, this_f, this_p, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, num_diss_params, dt, ir; - evolve_ppar=true, ion_dt=ion_dt) + composition, external_source_settings, num_diss_params, t_params.electron, + ir; evolve_ppar=true, ion_dt=ion_dt) # Now # residual = f_electron_old + dt*RHS(f_electron_newvar) # so update to desired residual @@ -2105,6 +2360,7 @@ function runtests() test_electron_dissipation_term(test_input) test_electron_krook_collisions(test_input) test_external_electron_source(test_input) + test_electron_implicit_constraint_forcing(test_input) test_electron_energy_equation(test_input) test_ion_dt_forcing_of_electron_ppar(test_input) test_electron_kinetic_equation(test_input) From 5fe06cfd78410126559191c737e2dc9c07776520 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 26 Aug 2024 20:13:42 +0100 Subject: [PATCH 037/107] Clean up constraints enforcement for electron backward euler --- .../src/electron_kinetic_equation.jl | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 231bd35c2..592de9918 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1103,10 +1103,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos electron_ppar_residual, f_electron_residual = residual electron_ppar_newvar, f_electron_newvar = new_variables - apply_electron_bc_and_constraints_no_r!(f_electron_newvar, phi, moments, - z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, - num_diss_params, composition, ir) + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron_newvar, phi, moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi) if evolve_ppar this_dens = moments.electron.dens @@ -1237,15 +1240,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end end - begin_z_region() - @loop_z iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], - f_electron_newvar[:,:,iz], - (evolve_density=true, - evolve_upar=true, - evolve_ppar=true), - vpa) - end + return nothing end From e38caf867272414907ff6005b49a3b0dc2e85713 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 26 Aug 2024 21:59:50 +0100 Subject: [PATCH 038/107] Enforce moment constraints on initial electron distribution function --- moment_kinetics/src/initial_conditions.jl | 9 ++- moment_kinetics/src/moment_constraints.jl | 10 +++ moment_kinetics/test/jacobian_matrix_tests.jl | 61 +++++++++++++++++-- 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 0e13826d6..3de547fa3 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -26,6 +26,7 @@ using ..electron_kinetic_equation: implicit_electron_advance! using ..em_fields: update_phi! using ..file_io: setup_electron_io, write_electron_state, finish_electron_io using ..load_data: reload_electron_data! +using ..moment_constraints: hard_force_moment_constraints! using ..moment_kinetics_structs: scratch_pdf, pdf_substruct, electron_pdf_substruct, pdf_struct, moments_struct, boundary_distributions_struct using ..nonlinear_solvers: nl_solver_info @@ -1631,13 +1632,17 @@ function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upa @loop_r ir begin # Initialise an unshifted Maxwellian as a first step @loop_z iz begin - vpa_over_vth = @. vpa.scratch3 = vpa.grid + upar[iz,ir] / vth[iz,ir] @loop_vperp ivperp begin - @. pdf[:,ivperp,iz,ir] = exp(-vpa_over_vth^2) + @. pdf[:,ivperp,iz,ir] = exp(-vpa.grid^2) end end end end + + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(pdf, moments, vpa) + + return nothing end function init_pdf_moments_manufactured_solns!(pdf, moments, vz, vr, vzeta, vpa, vperp, z, r, n_ion_species, n_neutral_species, geometry,composition) diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index e99599e4e..d0fea0ecd 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -89,6 +89,16 @@ function hard_force_moment_constraints!(f, moments, vpa) return A, B, C end +function hard_force_moment_constraints!(f::AbstractArray{mk_float,4}, moments, vpa) + A = moments.electron.constraints_A_coefficient + B = moments.electron.constraints_B_coefficient + C = moments.electron.constraints_C_coefficient + begin_r_z_region() + @loop_r_z ir iz begin + A[iz,ir], B[iz,ir], C[iz,ir] = + hard_force_moment_constraints!(@view(f[:,:,iz,ir]), moments, vpa) + end +end function hard_force_moment_constraints!(f::AbstractArray{mk_float,5}, moments, vpa) A = moments.ion.constraints_A_coefficient B = moments.ion.constraints_B_coefficient diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 205d32e49..34fd281ef 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -29,7 +29,8 @@ using moment_kinetics.krook_collisions: electron_krook_collisions!, add_electron_krook_collisions_to_Jacobian! using moment_kinetics.looping using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!, - add_electron_implicit_constraint_forcing_to_Jacobian! + add_electron_implicit_constraint_forcing_to_Jacobian!, + hard_force_moment_constraints! using moment_kinetics.type_definitions: mk_float using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r! @@ -201,7 +202,7 @@ end # Quite a large multiplier in rtol for this test, but it is plausible that a nonlinear # error (∼epsilon^2) could be multiplied by ∼vth*vpa.L/2∼sqrt(2)*60*6≈500. -function test_electron_z_advection(test_input; rtol=(3.0e1*epsilon)^2) +function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) test_input = deepcopy(test_input) test_input["run_name"] *= "_electron_z_advection" println(" electron_z_advection") @@ -231,6 +232,11 @@ function test_electron_z_advection(test_input; rtol=(3.0e1*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -406,7 +412,7 @@ function test_electron_z_advection(test_input; rtol=(3.0e1*epsilon)^2) return nothing end -function test_electron_vpa_advection(test_input; rtol=(5.0e1*epsilon)^2) +function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["run_name"] *= "_electron_vpa_advection" println(" electron_vpa_advection") @@ -457,6 +463,11 @@ function test_electron_vpa_advection(test_input; rtol=(5.0e1*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -703,6 +714,11 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -913,6 +929,11 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -1126,6 +1147,11 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -1353,6 +1379,11 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -1583,6 +1614,11 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -1761,7 +1797,7 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil return nothing end -function test_electron_energy_equation(test_input; rtol=(1.5e2*epsilon)^2) +function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["run_name"] *= "_electron_energy_equation" println(" electron_energy_equation") @@ -1813,6 +1849,11 @@ function test_electron_energy_equation(test_input; rtol=(1.5e2*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -1979,6 +2020,11 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes @@ -2117,7 +2163,7 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 return nothing end -function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) +function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["run_name"] *= "_electron_kinetic_equation" println(" electron_kinetic_equation") @@ -2147,6 +2193,11 @@ function test_electron_kinetic_equation(test_input; rtol=(2.0e2*epsilon)^2) @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) f = @view pdf.electron.norm[:,:,:,ir] + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) delta_f = similar(f) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes From 7074eddb97066cecf5d368e6b79f1983a104b7d0 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 27 Aug 2024 18:06:49 +0100 Subject: [PATCH 039/107] Make Jacobian matrix tests work in parallel Can be useful to help hunt bugs. --- moment_kinetics/test/jacobian_matrix_tests.jl | 1404 ++++++++++------- 1 file changed, 797 insertions(+), 607 deletions(-) diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 34fd281ef..c0a26b9f4 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -4,7 +4,9 @@ module JacobianMatrixTests include("setup.jl") +using moment_kinetics: setup_moment_kinetics, cleanup_moment_kinetics! using moment_kinetics.analysis: vpagrid_to_dzdt +using moment_kinetics.array_allocation: allocate_shared_float using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition! using moment_kinetics.derivatives: derivative_z! @@ -172,11 +174,16 @@ test_input = Dict("run_name" => "jacobian_matrix", function get_mk_state(test_input) mk_state = nothing quietoutput() do - mk_state = moment_kinetics.setup_moment_kinetics(test_input; - skip_electron_solve=true) + mk_state = setup_moment_kinetics(test_input; skip_electron_solve=true) end return mk_state end +function cleanup_mk_state!(args...) + quietoutput() do + cleanup_moment_kinetics!(args...) + end + return nothing +end function generate_norm_factor(perturbed_residual::AbstractArray{mk_float,3}) # half-width of the window for moving average @@ -227,33 +234,41 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) vpa_advect = advection_structs.vpa_advect me = composition.me_over_mi - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) - # Ensure initial electron distribution function obeys constraints + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + end hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_z_advection_to_Jacobian!( @@ -346,67 +361,77 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -458,33 +483,42 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_vpa_advection_to_Jacobian!( @@ -580,82 +614,92 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - # Divide out the z-average of the magnitude of perturbed_residual from the - # difference, so that different orders of magnitude at different w_∥ are all - # tested sensibly, but occasional small values of the residual do not make the - # test fail. - # Since we have already normalised, pass `rtol` to `atol` for the comparison. - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - # Divide out the z-average of the magnitude of perturbed_residual from the - # difference, so that different orders of magnitude at different w_∥ are all - # tested sensibly, but occasional small values of the residual do not make the - # test fail. - # Since we have already normalised, pass `rtol` to `atol` for the comparison. - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - # Divide out the z-average of the magnitude of perturbed_residual from the - # difference, so that different orders of magnitude at different w_∥ are all - # tested sensibly, but occasional small values of the residual do not make the - # test fail. - # Since we have already normalised, pass `rtol` to `atol` for the comparison. - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -709,33 +753,42 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_contribution_from_electron_pdf_term_to_Jacobian!( @@ -830,67 +883,77 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -924,33 +987,42 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_dissipation_term_to_Jacobian!( @@ -1043,67 +1115,77 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -1142,33 +1224,42 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_krook_collisions_to_Jacobian!( @@ -1262,67 +1353,77 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -1374,33 +1475,42 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_external_electron_source_to_Jacobian!( @@ -1494,67 +1604,77 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -1609,33 +1729,42 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_implicit_constraint_forcing_to_Jacobian!( @@ -1729,69 +1858,79 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil return nothing end - original_residual = zeros(mk_float, size(f)) - perturbed_residual = zeros(mk_float, size(f)) + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - zeros(p_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - # No norm factor, because both perturbed residuals should be zero here, as - # delta_p does not affect this term, and `f` (with no `delta_f`) obeys the - # constraints exactly, so this term vanishes. - @test elementwise_isapprox(perturbed_residual, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); - rtol=0.0, atol=1.0e-15) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_p does not affect this term, and `f` (with no `delta_f`) obeys the + # constraints exactly, so this term vanishes. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=1.0e-15) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], - delta_state[pdf_size+1:end]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -1844,33 +1983,42 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) - # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes - # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_electron_energy_equation_to_Jacobian!( @@ -1921,67 +2069,77 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) end end - original_residual = zeros(mk_float, size(ppar)) - perturbed_residual = zeros(mk_float, size(ppar)) + original_residual = allocate_shared_float(size(ppar)...) + perturbed_residual = allocate_shared_float(size(ppar)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check f did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - delta_state[1:pdf_size]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - perturbed_with_Jacobian ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check f did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - zeros(pdf_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - perturbed_with_Jacobian ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - delta_state[1:pdf_size]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - perturbed_with_Jacobian ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -2015,33 +2173,42 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) z_speed = @view z_advect[1].speed[:,:,:,ir] - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( @@ -2094,70 +2261,80 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 end end - original_residual = zeros(mk_float, size(ppar)) - perturbed_residual = zeros(mk_float, size(ppar)) + original_residual = allocate_shared_float(size(ppar)...) + perturbed_residual = allocate_shared_float(size(ppar)...) @testset "δf only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check f did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - delta_state[1:pdf_size]; atol=1.0e-15) - - # No norm factor, because both perturbed residuals should be zero here, as - # delta_f does not affect this term, and `ppar` is used as - # `ppar_previous_ion_step` in this test, so the residuals are exactly zero if - # there is no delta_p. - @test elementwise_isapprox(perturbed_residual, - perturbed_with_Jacobian; - rtol=0.0, atol=1.0e-15) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_f does not affect this term, and `ppar` is used as + # `ppar_previous_ion_step` in this test, so the residuals are exactly zero if + # there is no delta_p. + @test elementwise_isapprox(perturbed_residual, + perturbed_with_Jacobian; + rtol=0.0, atol=1.0e-15) + end end @testset "δp only" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f, ppar .+ delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check f did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - zeros(pdf_size); atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - perturbed_with_Jacobian ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual, f, ppar) residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] - - # Check ppar did not get perturbed by the Jacobian - @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], - delta_state[1:pdf_size]; atol=1.0e-15) - - norm_factor = generate_norm_factor(perturbed_residual) - @test elementwise_isapprox(perturbed_residual ./ norm_factor, - perturbed_with_Jacobian ./ norm_factor; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end end + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing @@ -2188,33 +2365,42 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) z_advect = advection_structs.z_advect vpa_advect = advection_structs.vpa_advect - delta_p = similar(ppar) + delta_p = allocate_shared_float(size(ppar)...) p_amplitude = epsilon * maximum(ppar) - @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) - f = @view pdf.electron.norm[:,:,:,ir] - # Make sure initial condition has some z-variation. As f is 'moment kinetic' this - # means f must have a non-Maxwellian part that varies in z. - f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end # Ensure initial electron distribution function obeys constraints hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) - delta_f = similar(f) + delta_f = allocate_shared_float(size(f)...) f_amplitude = epsilon * maximum(f) # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes # low-order moments vanish exactly. - delta_f .= f_amplitude .* - reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* - reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* - f + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end pdf_size = length(f) p_size = length(ppar) total_size = pdf_size + p_size - jacobian_matrix = zeros(mk_float, total_size, total_size) - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end end fill_electron_kinetic_equation_Jacobian!( @@ -2320,84 +2506,88 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) return nothing end - original_residual_f = zeros(mk_float, size(f)) - original_residual_p = zeros(mk_float, size(ppar)) - perturbed_residual_f = zeros(mk_float, size(f)) - perturbed_residual_p = zeros(mk_float, size(ppar)) + original_residual_f = allocate_shared_float(size(f)...) + original_residual_p = allocate_shared_float(size(ppar)...) + perturbed_residual_f = allocate_shared_float(size(f)...) + perturbed_residual_p = allocate_shared_float(size(ppar)...) @testset "δf only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] - perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] - - norm_factor_f = generate_norm_factor(perturbed_residual_f) - @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, - reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; - rtol=0.0, atol=rtol) - norm_factor_p = generate_norm_factor(perturbed_residual_p) - @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, - perturbed_with_Jacobian_p ./ norm_factor_p; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end end @testset "δp only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] - perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] - - norm_factor_f = generate_norm_factor(perturbed_residual_f) - @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, - reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; - rtol=0.0, atol=rtol) - norm_factor_p = generate_norm_factor(perturbed_residual_p) - @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, - perturbed_with_Jacobian_p ./ norm_factor_p; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end end @testset "δf and δp" begin residual_func!(original_residual_f, original_residual_p, f, ppar) residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p) - delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) - delta_state[pdf_size+1:end] .= vec(delta_p) - residual_update_with_Jacobian = jacobian_matrix * delta_state - perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] - perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] - - norm_factor_f = generate_norm_factor(perturbed_residual_f) - @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, - reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; - rtol=0.0, atol=rtol) - norm_factor_p = generate_norm_factor(perturbed_residual_p) - @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, - perturbed_with_Jacobian_p ./ norm_factor_p; - rtol=0.0, atol=rtol) + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) end return nothing end function runtests() - # Only run this test in serial, for simplicity. We are testing correctness of the - # matrix construction here, not performance or parallelisation, etc. - if global_size[] > 1 - @testset_skip "Jacobian matrix tests are only implemented for serial runs." "Jacobaian matrix" - return nothing - end - # Create a temporary directory for test output test_output_directory = get_MPI_tempdir() test_input["base_directory"] = test_output_directory From 7d9a2f0b9fec35fb3f04b491a4e01d68516269b4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 27 Aug 2024 22:27:01 +0100 Subject: [PATCH 040/107] Move electron solver steps per ion step to separate plot Often has different order of magnitude from 'linear steps per nonlinear step' and 'nonlinear steps per solve', so makes those hard to read if it is put on the same plot. --- .../src/makie_post_processing.jl | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index fb5aeefa0..3b4c634f7 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7765,17 +7765,39 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax) end end + end + + if has_nl_solver + put_legend_right(nl_solvers_fig, ax) + end + + + # Plot electron solver diagnostics + electron_solver_fig, ax = get_1d_ax(; xlabel="time", ylabel="electron steps per ion step") + + has_electron_solve = false + for ri ∈ run_info + if length(run_info) == 1 + prefix = "" + else + prefix = ri.run_name * " " + end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end if ri.composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) - has_nl_solver = true + has_electron_solve = true electron_steps_per_ion_step = get_variable(ri, "electron_steps_per_ion_step") plot_1d(time, electron_steps_per_ion_step, label=prefix * " electron steps per solve", ax=ax) end end - if has_nl_solver - put_legend_right(nl_solvers_fig, ax) + if has_electron_solve + put_legend_right(electron_solver_fig, ax) end @@ -7793,6 +7815,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n outfile = plot_prefix * "nonlinear_solver_iterations.pdf" save(outfile, nl_solvers_fig) end + + if has_electron_solve + outfile = plot_prefix * "electron_steps.pdf" + save(outfile, electron_solver_fig) + end else display(steps_fig) display(dt_fig) From 7c472e45f01831d009e8e28e564033c0c88f8fcd Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 28 Aug 2024 15:15:33 +0100 Subject: [PATCH 041/107] Optimise Jacobian matrix construction using CSR format sparse matrices Using CSR matrices makes it easier to loop over only the non-zero entries in the derivative matrix. --- moment_kinetics/Project.toml | 1 + .../src/electron_fluid_equations.jl | 18 ++++++---- .../src/electron_kinetic_equation.jl | 14 +++++--- moment_kinetics/src/electron_vpa_advection.jl | 34 +++++++++++-------- moment_kinetics/src/electron_z_advection.jl | 17 +++++----- moment_kinetics/src/gauss_legendre.jl | 9 +++-- 6 files changed, 56 insertions(+), 37 deletions(-) diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml index 8de860c52..a3d2fa2bb 100644 --- a/moment_kinetics/Project.toml +++ b/moment_kinetics/Project.toml @@ -31,6 +31,7 @@ Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 07f84849d..fe632e17a 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -377,7 +377,7 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa end me = composition.me_over_mi - z_deriv_matrix = z_spectral.D_matrix + z_deriv_matrix = z_spectral.D_matrix_csr v_size = vperp.n * vpa.n begin_z_region() @@ -403,10 +403,14 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] # upar*dppar_dz - for icolz ∈ 1:z.n + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz jacobian_matrix[row,col] += - dt * upar[iz] * z_deriv_matrix[iz,icolz] + dt * upar[iz] * z_deriv_entry end # 3*ppar*dupar_dz @@ -417,9 +421,9 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) - for icolz ∈ 1:z.n + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz - jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_matrix[iz,icolz] + jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry end for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset @@ -427,10 +431,10 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end end diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 592de9918..8c5d579f2 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -3428,7 +3428,7 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( source_density_amplitude = moments.electron.external_source_density_amplitude source_momentum_amplitude = moments.electron.external_source_momentum_amplitude source_pressure_amplitude = moments.electron.external_source_pressure_amplitude - z_deriv_matrix = z_spectral.D_matrix + z_deriv_matrix = z_spectral.D_matrix_csr v_size = vperp.n * vpa.n begin_z_vperp_vpa_region() @@ -3485,11 +3485,15 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * vth[iz] * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end if external_source_settings.electron.active # Source terms from `add_contribution_from_pdf_term!()` @@ -3505,11 +3509,11 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) - for icolz ∈ 1:z.n + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] - - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_matrix[iz,icolz] + - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry end end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 5aaab5eb7..9a1935333 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -128,7 +128,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, * "need differentiation matrices.") end - z_deriv_matrix = z_spectral.D_matrix + z_deriv_matrix = z_spectral.D_matrix_csr + vpa_Dmat = vpa_spectral.lobatto.Dmat + vpa_element_scale = vpa.element_scale begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin @@ -153,10 +155,10 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ if ielement_vpa == 1 && igrid_vpa == 1 jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa] + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa] elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] elseif igrid_vpa == vpa.ngrid # Note igrid_vpa is only ever 1 when ielement_vpa==1, because # of the way element boundaries are counted. @@ -164,19 +166,19 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] if vpa_speed < 0.0 jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] elseif vpa_speed > 0.0 jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] else jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[ielement_vpa] + dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * 0.5 * vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[ielement_vpa+1] + dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] end else jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_spectral.lobatto.Dmat[igrid_vpa,:] ./ vpa.element_scale[ielement_vpa] + dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa] end # q = 2*p*vth*∫dw_∥ w_∥^3 g # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g @@ -202,18 +204,22 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 end - for icolz ∈ 1:z.n, icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * - vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[iz,icolz] + vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) - for icolz ∈ 1:z.n + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz - jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_matrix[iz,icolz] + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_entry end # (1/2*vth/p*dp/dz - w_∥^2*dvth/dz # + source_density_amplitude*u/n/vth @@ -250,12 +256,12 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz] + 2.0*upar[iz]*source_momentum_amplitude[iz])/ppar[iz]^2 ) * dpdf_dvpa[ivpa,ivperp,iz] end - for icolz ∈ 1:z.n + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz jacobian_matrix[row,col] += dt * ( 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me) - vpa.grid[ivpa]^2/sqrt(2.0*ppar[iz]*dens[iz]*me) - ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_matrix[iz,icolz] + ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_entry end end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 5c5cb0b1d..06913aa2f 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -109,7 +109,8 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p * "add_electron_z_advection_to_Jacobian!() preconditioner because we need " * "differentiation matrices.") end - z_deriv_matrix = z_spectral.D_matrix + z_Dmat = z_spectral.lobatto.Dmat + z_element_scale = z.element_scale begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin @@ -132,10 +133,10 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p # Contributions from (w_∥*vth + upar)*dg/dz if ielement_z == 1 && igrid_z == 1 jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z] + dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z] elseif ielement_z == z.nelement_local && igrid_z == z.ngrid jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] elseif igrid_z == z.ngrid # Note igrid_z is only ever 1 when ielement_z==1, because # of the way element boundaries are counted. @@ -143,19 +144,19 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p icolumn_max_z_next = z.imax[ielement_z+1] if z_speed < 0.0 jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] elseif z_speed > 0.0 jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] else jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement_z] + dt * z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z] jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement_z+1] + dt * z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] end else jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_spectral.lobatto.Dmat[igrid_z,:] ./ z.element_scale[ielement_z] + dt * z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z] end # vth = sqrt(2*p/n/me) # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index de95dae35..5d1558c5d 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -25,6 +25,7 @@ using FastGaussQuadrature using LegendrePolynomials: Pl, dnPl using LinearAlgebra: mul!, lu, LU using SparseArrays: sparse, AbstractSparseArray +using SparseMatricesCSR using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float import ..calculus: elementwise_derivative!, mass_matrix_solve! @@ -82,7 +83,7 @@ struct gausslegendre_base_info Y31::Array{mk_float,3} end -struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info +struct gausslegendre_info{TSparse, TSparseCSR, TLU} <: weak_discretization_info lobatto::gausslegendre_base_info radau::gausslegendre_base_info # global (1D) mass matrix @@ -96,9 +97,11 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info L_matrix::TSparse # global (1D) strong first derivative matrix D_matrix::TSparse + # global (1D) strong first derivative matrix in Compressed Sparse Row (CSR) format + D_matrix_csr::TSparseCSR # global (1D) weak second derivative matrix, with inverse mass matrix included (so # matrix is dense) - dense_second_deriv_matrix::AbstractArray{mk_float,2} + dense_second_deriv_matrix::Array{mk_float,2} # global (1D) LU object mass_matrix_lu::TLU # dummy matrix for local operators @@ -128,7 +131,7 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) - return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),dense_second_deriv_matrix,mass_matrix_lu,Qmat) + return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),convert(SparseMatrixCSR{1,mk_float,mk_int},D_matrix),dense_second_deriv_matrix,mass_matrix_lu,Qmat) end function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true) From b6d1cc458676a925ce168162f50ea59fda7ab593 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 28 Aug 2024 15:16:51 +0100 Subject: [PATCH 042/107] Reuse LU setup for Jacobian factorization Speeds up the LU factorization by just under 2x. --- moment_kinetics/src/electron_kinetic_equation.jl | 12 +++++++++++- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 8c5d579f2..8eaeca923 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1045,7 +1045,17 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos begin_serial_region() if block_rank[] == 0 - nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) + if size(orig_lu) == (1, 1) + # Have not properly created the LU decomposition before, so + # cannot reuse it. + nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + lu!(orig_lu, sparse(precon_matrix); check=false) + nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer) + end else nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer) end diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 4f547445f..25f0a604b 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -142,7 +142,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol ) elseif preconditioner_type == "electron_lu" pdf_plus_ppar_size = total_size_coords + coords.z.n - preconditioners = fill((lu(sparse(1.0*I, pdf_plus_ppar_size, pdf_plus_ppar_size)), + preconditioners = fill((lu(sparse(1.0*I, 1, 1)), allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), From bd2c41d93ae2b71d62c9556f8b1f1c460d30298f Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 28 Aug 2024 16:43:33 +0100 Subject: [PATCH 043/107] Always add electron energy equation terms to Jacobian matrix ...even when `evolve_ppar=false`. Doing this ensures that the Jacobian matrix always has the same non-zero entries, so that we can reuse the LU factorization struct regardless of whether the preconditioner (re-)construction is called when `evolve_ppar=true` or `evolve_ppar=false`. --- moment_kinetics/src/electron_kinetic_equation.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 8eaeca923..e4d09e5d8 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2954,12 +2954,13 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome add_electron_implicit_constraint_forcing_to_Jacobian!( jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt, ir) - if evolve_ppar - add_electron_energy_equation_to_Jacobian!( - jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, - dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, - num_diss_params, dt, ir; ppar_offset=pdf_size) - end + # Always add the electron energy equation term, even if evolve_ppar=false, so that the + # Jacobian matrix always has the same shape, meaning that we can always reuse the LU + # factorization struct. + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir; ppar_offset=pdf_size) if ion_dt !== nothing add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) From 5402b519c8d304c111710696f52c4c0cb2d6af1d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 28 Aug 2024 16:46:23 +0100 Subject: [PATCH 044/107] Run electron solve with evolve_ppar=false to feed explicit d/dt When the next stage has `t_params.implicit_coefficient_is_zero[istage] === true`, there will be no implicit solve that updates the electron distribution function. However, the electron pressure is updated (as a linear combination of other stage values, by the RK scheme) since the previous implicit solve, so the steady state solution for the distribution function will be slightly different. Therefore before a stage with `t_params.implicit_coefficient_is_zero[istage] === true` runs, the electron kinetic equation solve has to be run (without any electron_ppar update) to get the consistent distribution function. --- moment_kinetics/src/time_advance.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 201124755..410bcc4ae 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2408,9 +2408,7 @@ function apply_all_bcs_constraints_update_moments!( # to the beginning of the ion/neutral timestep, so the electron solution # calculated here would be discarded - we might as well skip calculating it in # that case. - if update_electrons && - !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) && - success == "" + if update_electrons && !(t_params.implicit_electron_advance) && success == "" kinetic_electron_success = update_electron_pdf!( scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, electron_z_advect, From 6aed4db63742d1d60578a7e6fad993540350c51b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 28 Aug 2024 16:52:58 +0100 Subject: [PATCH 045/107] Include electron_ppar, with ion_dt set, in second stage electron setup This prevents electron_ppar from changing too much (so convergence is faster), but seems to be an easier solve than updating the electron distribution function on its own, without allowing electron_ppar to evolve at all. --- moment_kinetics/src/initial_conditions.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 3de547fa3..fe94b8da6 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -787,7 +787,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; - io_electron=io_initial_electron) + io_electron=io_initial_electron, + evolve_ppar=true, ion_dt=t_params.dt[]) end if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" From a246e6f23f0f0bb76285f2ded1479f62326cd254 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 29 Aug 2024 16:09:59 +0100 Subject: [PATCH 046/107] Fix RK integration test when `a_implicit[1,1] != 0` When using an IMEX RK scheme that does *not* have an 'explicit first stage' (it has this when `a_implicit[1,1] = 0`), the Butcher-table integration test method was incorrect, as the first explicit RHS evaluation needs to be evaluated with the first implicitly-updated 'y' - when a[1,1]=0 there is no implicit update in the first stage, so this error was hidden. --- util/test-rk-timestep.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index dd77ab06f..66d95d123 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -194,8 +194,9 @@ function rk_advance_butcher(a, b, y0, dt, nsteps, a_implicit=nothing, b_implicit error = zeros(nsteps+1) for it ∈ 1:nsteps - kscratch[1] = dt*f(y) kscratch_implicit[1] = dt*f_implicit(y, a_implicit[1,1] * dt) + ystage = backward_euler(y, dt * a_implicit[1,1]) + kscratch[1] = dt*f(ystage) for i ∈ 2:n_rk_stages ytilde = y + sum(a[i,j] * kscratch[j] for j ∈ 1:i-1) + From 3bfce739264574128ef860768e790df6187ec2dc Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 29 Aug 2024 16:15:41 +0100 Subject: [PATCH 047/107] Handle not-'low-storage' 2-stage methods in calculate_rk_coeffs.jl A 2-stage method always triggers the test that used to be used to identify 'low-storage' methods, so have to pass the 'low-storage' status as an argument instead of trying to auto-detect it. --- util/calculate_rk_coeffs.jl | 9 ++++----- util/test-rk-timestep.jl | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index f4f3c366d..925a61a6f 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -459,12 +459,11 @@ function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, end function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, - adaptive, + adaptive, low_storage, rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1), implicit_coefficient_is_zero=nothing ) where {T,N} using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational - low_storage = size(rk_coefs, 1) == 3 if adaptive n_rk_stages = size(rk_coefs, 2) - 1 else @@ -654,7 +653,7 @@ function convert_and_check_butcher_tableau(name, a, b, # Consistency check: converting back should give the original a, b. a_check, b_check, a_check_implicit, b_check_implicit = - convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) + convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, low_storage, rk_coefs_implicit, implicit_coefficient_is_zero) if eltype(a) == Rational if a_check != a @@ -704,7 +703,7 @@ function convert_and_check_butcher_tableau(name, a, b, end end -function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, +function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, low_storage=true, rk_coefs_implicit=zeros(eltype(rk_coefs), size(rk_coefs, 1), size(rk_coefs, 2) + 1), @@ -717,7 +716,7 @@ function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, if imex print("rk_coefs_implicit="); display(rk_coefs_implicit) end - a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) + a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, low_storage, rk_coefs_implicit, implicit_coefficient_is_zero) print("a="); display(a) print("b="); display(b) if imex diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index 66d95d123..b1438f233 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -327,7 +327,7 @@ methods = Dict( ), ) -a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true) +a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true, false) methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, a = a, b = b) From f10cdd742f421e4d6710336917958f820f8a1ec9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 29 Aug 2024 16:32:25 +0100 Subject: [PATCH 048/107] Don't recreate preconditioner at the beginning of every electron solve It is expensive to construct the Jacobian and LU-factorize it, and the preconditioner seems to still work OK with less frequent updates. --- moment_kinetics/src/electron_kinetic_equation.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index e4d09e5d8..d7c41a986 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -755,10 +755,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # initialise the electron pdf convergence flag to false electron_pdf_converged = false - # Reset nl_solver_params.stage_counter[] so that the preconditioner is re-computed at - # the first step - nl_solver_params.stage_counter[] = 0 - first_step = true # evolve (artificially) in time until the residual is less than the tolerance while (!electron_pdf_converged From da4e7413460277186300e191fef60da34815b937 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 29 Aug 2024 16:36:16 +0100 Subject: [PATCH 049/107] Add DIRK-IMEX methods from Pareschi & Russo 2005 May be useful to have IMEX methods that do *not* have an explicit first stage, as this explicit first stage requires an electron solve that does not update electron_ppar, which seems to mess things up sometimes. --- moment_kinetics/src/runge_kutta.jl | 64 ++++++++++++++++++++++++++++ util/calculate_rk_coeffs.jl | 68 ++++++++++++++++++++++++++++++ util/test-rk-timestep.jl | 38 ++++++++++++++++- 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 16d0ef600..ff61661ba 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -200,6 +200,70 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat else CFL_prefactor = input_CFL_prefactor end + elseif type == "PareschiRusso2(2,2,2)" + # 2nd-order, 2-stage IMEX method 'IMEX-SSP2(2,2,2)' from Pareschi & Russo 2005, Table II + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[-0.4142135623730950488016887242096980785696718753769480731766797379907324784621711 -0.5 ; + 0.9999999999999999999999999999999999999999999999999999999999999999999999999999827 -1.207106781186547524400844362104849039284835937688474036588339868995366239231094; + -0.0 0.5 ] + rk_coefs_implicit = mk_float[ 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.4142135623730950488016887242096980785696718753769480731766797379907324784621883 1.0; + -0.0 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 1.207106781186547524400844362104849039284835937688474036588339868995366239231094] + implicit_coefficient_is_zero = Bool[false, false] + n_rk_stages = 2 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso2(3,2,2)" +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III +# (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[2 -1 -1//2; + 0 0 0 ; + 0 1 -1//2; + 0 0 1//2] + rk_coefs_implicit = mk_float[1//2 -1 1 1//2; + 0 1//2 0 1//2; + 0 0 1//2 1//2] + implicit_coefficient_is_zero = Bool[false, false, false] + n_rk_stages = 3 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso2(3,3,2)" + # 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[1 -4//3 -1//9; + 1//2 -1//3 -4//9; + 0 1//2 -1//3; + 0 0 1//3] + rk_coefs_implicit = mk_float[1//4 -1//2 4//3 4//9; + 0 1//4 5//6 4//9; + 0 0 1//3 2//3] + implicit_coefficient_is_zero = Bool[false, false, false] + n_rk_stages = 3 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso3(4,3,3)" + # 3rd-order, 4-stage IMEX method 'IMEX-SSP3(4,3,3)' from Pareschi & Russo 2005, Table VI + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[ 2.0 -5.27491721763532 0.9999999999999688 -0.1666666666666453; + -0.0 0.0 1.4589197899688663e-17 0.0 ; + -0.0 1.0 -0.0343646522044047 -0.500000000000007 ; + -0.0 -0.0 0.25 -2.091639072545107 ; + -0.0 -0.0 -0.0 0.6666666666666664] + rk_coefs_implicit = mk_float[ 0.24169426078821 -1.0 3.13745860881766 1.0436096431476471e-14 0.16666666666665975; + -0.0 0.24169426078821 2.13745860881766 -0.24999999999997924 0.3333333333333193 ; + -0.0 -0.0 0.24169426078821 0.034364652204404655 0.500000000000007 ; + -0.0 -0.0 -0.0 0.24169426078821 2.0916390725451066 ] + implicit_coefficient_is_zero = Bool[false, false, false, false] + n_rk_stages = 4 + rk_order = 3 + adaptive = false + low_storage = false + CFL_prefactor = NaN elseif type == "SSPRK4" n_rk_stages = 4 rk_coefs = allocate_float(3, n_rk_stages) diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index 925a61a6f..5dc9fa686 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -1065,3 +1065,71 @@ convert_and_check_butcher_tableau( Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], ; low_storage=false) + +# 2nd-order, 2-stage IMEX method 'IMEX-SSP2(2,2,2)' from Pareschi & Russo 2005, Table II +# (https://doi.org/10.1007/s10915-004-4636-4) +gamma = 1 - 1 / sqrt(BigFloat(2)) +convert_and_check_butcher_tableau( + "PareschiRusso2(2,2,2)", + BigFloat[0 0; + 1 0; + ], + BigFloat[1//2 1//2], + BigFloat[gamma 0 ; + 1-2*gamma gamma; + ], + BigFloat[1//2 1//2], + ; low_storage=false) + +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III +# (https://doi.org/10.1007/s10915-004-4636-4) +convert_and_check_butcher_tableau( + "PareschiRusso2(3,2,2)", + Rational{Int64}[0 0 0; + 0 0 0; + 0 1 0; + ], + Rational{Int64}[0 1//2 1//2], + Rational{Int64}[ 1//2 0 0 ; + -1//2 1//2 0 ; + 0 1//2 1//2; + ], + Rational{Int64}[0 1//2 1//2], + ; low_storage=false) + +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,3,2)' from Pareschi & Russo 2005, Table IV +# (https://doi.org/10.1007/s10915-004-4636-4) +convert_and_check_butcher_tableau( + "PareschiRusso2(3,3,2)", + Rational{Int64}[0 0 0; + 1//2 0 0; + 1//2 1//2 0; + ], + Rational{Int64}[1//3 1//3 1//3], + Rational{Int64}[1//4 0 0 ; + 0 1//4 0 ; + 1//3 1//3 1//3; + ], + Rational{Int64}[1//3 1//3 1//3], + ; low_storage=false) + +# 3rd-order, 4-stage IMEX method 'IMEX-SSP3(4,3,3)' from Pareschi & Russo 2005, Table VI +# (https://doi.org/10.1007/s10915-004-4636-4) +alpha = 0.24169426078821 +beta = 0.06042356519705 +eta = 0.12915286960590 +convert_and_check_butcher_tableau( + "PareschiRusso3(4,3,3)", + typeof(alpha)[0 0 0 0; + 0 0 0 0; + 0 1 0 0; + 0 1//4 1//4 0; + ], + typeof(alpha)[0 1//6 1//6 2//3], + typeof(alpha)[alpha 0 0 0 ; + -alpha alpha 0 0 ; + 0 1-alpha alpha 0 ; + beta eta 1//2-beta-eta-alpha alpha; + ], + typeof(alpha)[0 1//6 1//6 2//3], + ; low_storage=false) diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index b1438f233..2282170aa 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -325,7 +325,43 @@ methods = Dict( a_implicit = Float64[0 0 0 0; 1767732205903//4055673282236 1767732205903//4055673282236 0 0; 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0; 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236], b_implicit = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100] ), - ) + + "PareschiRusso2(2,2,2)" => (a=Float64[0.0 0.0; 1.0 0.0], + b=Float64[0.5 0.5], + a_implicit=Float64[0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.0; 0.4142135623730950488016887242096980785696718753769480731766797379907324784621193 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404], + b_implicit=Float64[0.5 0.5], + rk_coefs=Float64[-0.4142135623730950488016887242096980785696718753769480731766797379907324784621711 -0.5; 0.9999999999999999999999999999999999999999999999999999999999999999999999999999827 -1.207106781186547524400844362104849039284835937688474036588339868995366239231094; -0.0 0.5], + rk_coefs_implicit=Float64[0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.4142135623730950488016887242096980785696718753769480731766797379907324784621883 1.0; -0.0 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 1.207106781186547524400844362104849039284835937688474036588339868995366239231094], + implicit_coefficient_is_zero=Bool[0, 0], + ), + + "PareschiRusso2(3,2,2)" => (a=Float64[0 0 0; 0 0 0; 0 1 0], + b=Float64[0 1//2 1//2], + a_implicit=Float64[1//2 0 0; -1//2 1//2 0; 0 1//2 1//2], + b_implicit=Float64[0 1//2 1//2], + rk_coefs=Float64[2 -1 -1//2; 0 0 0; 0 1 -1//2; 0 0 1//2], + rk_coefs_implicit=Float64[1//2 -1 1 1//2; 0 1//2 0 1//2; 0 0 1//2 1//2], + implicit_coefficient_is_zero=Bool[0, 0, 0], + ), + + "PareschiRusso2(3,3,2)" => (a=Float64[0 0 0; 1//2 0 0; 1//2 1//2 0], + b=Float64[1//3 1//3 1//3], + a_implicit=Float64[1//4 0 0; 0 1//4 0; 1//3 1//3 1//3], + b_implicit=Float64[1//3 1//3 1//3], + rk_coefs=Float64[1 -4//3 -1//9; 1//2 -1//3 -4//9; 0 1//2 -1//3; 0 0 1//3], + rk_coefs_implicit=Float64[1//4 -1//2 4//3 4//9; 0 1//4 5//6 4//9; 0 0 1//3 2//3], + implicit_coefficient_is_zero=Bool[0, 0, 0], + ), + + "PareschiRusso3(4,3,3)" => (a=Float64[0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 0.25 0.25 0.0], + b=Float64[0.0 0.16666666666666666 0.16666666666666666 0.6666666666666666], + a_implicit=Float64[0.24169426078821 0.0 0.0 0.0; -0.24169426078821 0.24169426078821 0.0 0.0; 0.0 0.75830573921179 0.24169426078821 0.0; 0.06042356519705 0.1291528696059 0.06872930440884001 0.24169426078821], + b_implicit=Float64[0.0 0.16666666666666666 0.16666666666666666 0.6666666666666666], + rk_coefs=Float64[2.0 -5.27491721763532 0.9999999999999688 -0.1666666666666453; -0.0 0.0 1.4589197899688663e-17 0.0; -0.0 1.0 -0.0343646522044047 -0.500000000000007; -0.0 -0.0 0.25 -2.091639072545107; -0.0 -0.0 -0.0 0.6666666666666664], + rk_coefs_implicit=Float64[0.24169426078821 -1.0 3.13745860881766 1.0436096431476471e-14 0.16666666666665975; -0.0 0.24169426078821 2.13745860881766 -0.24999999999997924 0.3333333333333193; -0.0 -0.0 0.24169426078821 0.034364652204404655 0.500000000000007; -0.0 -0.0 -0.0 0.24169426078821 2.0916390725451066], + implicit_coefficient_is_zero=Bool[0, 0, 0, 0], + ), + ) a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true, false) methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, From 5935532418bfcd37bb4ee0ffd55fa1b2bd66052c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 29 Aug 2024 16:58:34 +0100 Subject: [PATCH 050/107] Fix electron initialisation if different n_rk_stages for electrons/ions --- moment_kinetics/src/initial_conditions.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index fe94b8da6..f431c73f9 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -450,11 +450,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z scratch[1].electron_ppar .= moments.electron.ppar scratch[1].electron_pperp .= 0.0 #moments.electron.pperp scratch[1].electron_temp .= moments.electron.temp - if t_params.electron === nothing - n_rk_stages = length(scratch) - 1 - else - n_rk_stages = t_params.electron.n_rk_stages - end + n_rk_stages = t_params.n_rk_stages scratch[n_rk_stages+1].electron_density .= moments.electron.dens scratch[n_rk_stages+1].electron_upar .= moments.electron.upar scratch[n_rk_stages+1].electron_ppar .= moments.electron.ppar From 26ff7a14524eb1297be9f1df101907c4def1e924 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 10:08:20 +0100 Subject: [PATCH 051/107] Fix setting of *_updated variables when restarting simulations These variables do not use shared-memory so they need to be set outside the `@serial_region`. --- moment_kinetics/src/load_data.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 154791890..ceaa3eec0 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -684,17 +684,14 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.dens_updated .= true moments.ion.upar .= reload_moment("parallel_flow", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.upar_updated .= true moments.ion.ppar .= reload_moment("parallel_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.ppar_updated .= true moments.ion.pperp .= reload_moment("perpendicular_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, @@ -703,7 +700,6 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.qpar_updated .= true moments.ion.vth .= reload_moment("thermal_speed", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, @@ -805,28 +801,24 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.dens_updated[] = true moments.electron.upar .= reload_electron_moment("electron_parallel_flow", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.upar_updated[] = true moments.electron.ppar .= reload_electron_moment("electron_parallel_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.ppar_updated[] = true moments.electron.qpar .= reload_electron_moment("electron_parallel_heat_flux", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.qpar_updated[] = true moments.electron.vth .= reload_electron_moment("electron_thermal_speed", dynamic, time_index, r, z, r_range, z_range, restart_r, @@ -895,25 +887,21 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.dens_updated .= true moments.neutral.uz .= reload_moment("uz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.uz_updated .= true moments.neutral.pz .= reload_moment("pz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.pz_updated .= true moments.neutral.qz .= reload_moment("qz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.qz_updated .= true moments.neutral.vth .= reload_moment("thermal_speed_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, @@ -1024,6 +1012,18 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, close(fid) end end + moments.ion.dens_updated .= true + moments.ion.upar_updated .= true + moments.ion.ppar_updated .= true + moments.ion.qpar_updated .= true + moments.electron.dens_updated[] = true + moments.electron.upar_updated[] = true + moments.electron.ppar_updated[] = true + moments.electron.qpar_updated[] = true + moments.neutral.dens_updated .= true + moments.neutral.uz_updated .= true + moments.neutral.pz_updated .= true + moments.neutral.qz_updated .= true restart_electron_physics = MPI.bcast(restart_electron_physics, 0, comm_block[]) From 2933dc84a200376577730b3865feb1627bbc7d36 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 11:36:30 +0100 Subject: [PATCH 052/107] Don't re-run electron solve when using implicit electron schemes When using `t_params.implicit_electron_advance = true` or `t_params.implicit_electron_ppar = true`, there is no need to re-solve the electron kinetic equation in the `apply_all_bcs_constraints_update_moments!()` call. Re-solving there can actually cause convergence failures, because the electron solve seems to converge more robustly when including `electron_ppar` in the solve, and the re-solve does not include it (only solves for `pdf_electron`). --- moment_kinetics/src/time_advance.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 410bcc4ae..b318b7939 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3034,13 +3034,15 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. old_scratch = scratch_implicit[istage] + update_electrons = !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) success = apply_all_bcs_constraints_update_moments!( scratch_implicit[istage], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, t_params, nl_solver_params, advance, scratch_dummy, false, - max_electron_pdf_iterations, max_electron_sim_time) + max_electron_pdf_iterations, max_electron_sim_time; + update_electrons=update_electrons) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so From 326762bbe0e3e1eef598d15b9ccd8911c2fb124c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 18:46:07 +0100 Subject: [PATCH 053/107] Don't use forward-Euler initial guess in electron_backward_euler!() When timesteps get long, using a forward-Euler step as the initial guess might just make the solver take longer to converge, if it overshoots the right values. --- moment_kinetics/src/electron_kinetic_equation.jl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index d7c41a986..b56b99ea0 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -783,17 +783,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end - # Do a forward-Euler update of the electron pdf as an initial guess. Even when - # evolving electron_ppar, do not update electron_ppar here because if dt is bigger - # than ion_dt, then an explicit timestep will likely make electron_ppar over-shoot - # which would just take more iterations in the Newton-Krylov solve to fix. - electron_kinetic_equation_euler_update!(f_electron_new, electron_ppar_new, - f_electron_old, electron_ppar_old, - moments, z, vperp, vpa, z_spectral, - vpa_spectral, z_advect, vpa_advect, - scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, t_params, ir) # Calculate heat flux and derivatives using updated f_electron @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], electron_ppar_new, From 36529b2fe0ed83cc15d6e3b0d699362273cbc117 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 18:47:31 +0100 Subject: [PATCH 054/107] Allow for block boundaries in skip_f_electron_bc_points_in_Jacobian() Just use 'Dirichlet' boundary conditions for all incoming points at the z-boundaries of any block. --- moment_kinetics/src/boundary_conditions.jl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 3a1db780d..8642470cd 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -1086,13 +1086,16 @@ Jacobian matrix does not modify those points. Returns `false` otherwise. """ function skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) # z boundary condition - if z.bc ∈ ("wall", "constant") - if z.irank == 0 && iz == 1 && z_speed[iz,ivpa,ivperp] ≥ 0.0 - return true - end - if z.irank == z.nrank - 1 && iz == z.n && z_speed[iz,ivpa,ivperp] ≤ 0.0 - return true - end + # Treat as if using Dirichlet boundary condition for incoming part of the distribution + # function on the block boundary, regardless of the actual boundary condition and + # whether this is an internal boundary or an actual domain boundary. This prevents the + # matrix evaluated for a single block (without coupling to neighbouring blocks) from + # becoming singular + if iz == 1 && z_speed[iz,ivpa,ivperp] ≥ 0.0 + return true + end + if iz == z.n && z_speed[iz,ivpa,ivperp] ≤ 0.0 + return true end # vperp boundary condition From ccb643a8611a9704958a60f1d1ec1d248efacb1a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 18:55:07 +0100 Subject: [PATCH 055/107] Add reconcile_element_boundaries_MPI_z_pdf_vpavperpz!() Having these functions tidies up the derivative_z_pdf_vpavperpz!() functions. --- moment_kinetics/src/calculus.jl | 154 ++++++++++++++++++++++++++++- moment_kinetics/src/derivatives.jl | 122 ++--------------------- 2 files changed, 160 insertions(+), 116 deletions(-) diff --git a/moment_kinetics/src/calculus.jl b/moment_kinetics/src/calculus.jl index 2b473a232..44bdbf324 100644 --- a/moment_kinetics/src/calculus.jl +++ b/moment_kinetics/src/calculus.jl @@ -541,7 +541,7 @@ function reconcile_element_boundaries_MPI!(df1d::AbstractArray{mk_float,Ndims}, # synchronize buffers _block_synchronize() end - + function apply_adv_fac!(buffer::AbstractArray{mk_float,Ndims},adv_fac::AbstractArray{mk_float,Ndims},endpoints::AbstractArray{mk_float,Ndims},sgn::mk_int) where Ndims #buffer contains off-process endpoint #adv_fac < 0 is positive advection speed @@ -638,6 +638,158 @@ function reconcile_element_boundaries_MPI!(df1d::AbstractArray{mk_float,Ndims}, _block_synchronize() end +# Special version for pdf_electron with no r-dimension, which has the same number of +# dimensions as an ion/neutral moment variable, but different dimensions. +function reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(df1d::AbstractArray{mk_float,3}, + dfdx_lower_endpoints::AbstractArray{mk_float,2}, dfdx_upper_endpoints::AbstractArray{mk_float,2}, + receive_buffer1::AbstractArray{mk_float,2}, receive_buffer2::AbstractArray{mk_float,2}, coord) + + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + #if block_rank[] == 0 # lead process on this shared-memory block + @serial_region begin + + # now deal with endpoints that are stored across ranks + comm = coord.comm + nrank = coord.nrank + irank = coord.irank + #send_buffer = coord.send_buffer + #receive_buffer = coord.receive_buffer + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq1 = MPI.Irecv!(receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdx_upper_endpoints, comm; dest=idst, tag=1) + #print("$irank: Sending $irank -> $idst = $dfdx_upper_endpoints\n") + + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq2 = MPI.Irecv!(receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdx_lower_endpoints, comm; dest=idst, tag=2) + #print("$irank: Sending $irank -> $idst = $dfdx_lower_endpoints\n") + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + #print("$irank: Received $isrc -> $irank = $receive_buffer1\n") + #print("$irank: Received $isrc -> $irank = $receive_buffer2\n") + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if coord.bc == "periodic" + #update the extreme lower endpoint with data from irank = nrank -1 + receive_buffer1 .= 0.5*(receive_buffer1 .+ dfdx_lower_endpoints) + else #directly use value from Cheb + receive_buffer1 .= dfdx_lower_endpoints + end + else # enforce continuity at lower endpoint + receive_buffer1 .= 0.5*(receive_buffer1 .+ dfdx_lower_endpoints) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,1] .= receive_buffer1 + + if irank == nrank-1 + if coord.bc == "periodic" + #update the extreme upper endpoint with data from irank = 0 + receive_buffer2 .= 0.5*(receive_buffer2 .+ dfdx_upper_endpoints) + else #directly use value from Cheb + receive_buffer2 .= dfdx_upper_endpoints + end + else # enforce continuity at upper endpoint + receive_buffer2 .= 0.5*(receive_buffer2 .+ dfdx_upper_endpoints) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,end] .= receive_buffer2 + + end + # synchronize buffers + _block_synchronize() +end + +# Special version for pdf_electron with no r-dimension, which has the same number of +# dimensions as an ion/neutral moment variable, but different dimensions. +function reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(df1d::AbstractArray{mk_float,3}, + adv_fac_lower_endpoints::AbstractArray{mk_float,2}, adv_fac_upper_endpoints::AbstractArray{mk_float,2}, + dfdx_lower_endpoints::AbstractArray{mk_float,2}, dfdx_upper_endpoints::AbstractArray{mk_float,2}, + receive_buffer1::AbstractArray{mk_float,2}, receive_buffer2::AbstractArray{mk_float,2}, coord) + + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + #if block_rank[] == 0 # lead process on this shared-memory block + @serial_region begin + # now deal with endpoints that are stored across ranks + comm = coord.comm + nrank = coord.nrank + irank = coord.irank + #send_buffer = coord.send_buffer + #receive_buffer = coord.receive_buffer + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # send highest end point on THIS rank + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq1 = MPI.Irecv!(receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdx_upper_endpoints, comm; dest=idst, tag=1) + #print("$irank: Sending $irank -> $idst = $dfdx_upper_endpoints\n") + + # send lowest end point on THIS rank + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq2 = MPI.Irecv!(receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdx_lower_endpoints, comm; dest=idst, tag=2) + #print("$irank: Sending $irank -> $idst = $dfdx_lower_endpoints\n") + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + #print("$irank: Received $isrc -> $irank = $receive_buffer1\n") + #print("$irank: Received $isrc -> $irank = $receive_buffer2\n") + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if coord.bc == "periodic" + # depending on adv_fac, update the extreme lower endpoint with data from irank = nrank -1 + apply_adv_fac!(receive_buffer1,adv_fac_lower_endpoints,dfdx_lower_endpoints,1) + else # directly use value from Cheb at extreme lower point + receive_buffer1 .= dfdx_lower_endpoints + end + else # depending on adv_fac, update the lower endpoint with data from irank = nrank -1 + apply_adv_fac!(receive_buffer1,adv_fac_lower_endpoints,dfdx_lower_endpoints,1) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,1] .= receive_buffer1 + + if irank == nrank-1 + if coord.bc == "periodic" + # depending on adv_fac, update the extreme upper endpoint with data from irank = 0 + apply_adv_fac!(receive_buffer2,adv_fac_upper_endpoints,dfdx_upper_endpoints,-1) + else #directly use value from Cheb + receive_buffer2 .= dfdx_upper_endpoints + end + else # enforce continuity at upper endpoint + apply_adv_fac!(receive_buffer2,adv_fac_upper_endpoints,dfdx_upper_endpoints,-1) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,end] .= receive_buffer2 + + end + # synchronize buffers + _block_synchronize() +end + """ Computes the integral of the integrand, using the input wgts """ diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl index 1506ebbd1..cd8cf84c0 100644 --- a/moment_kinetics/src/derivatives.jl +++ b/moment_kinetics/src/derivatives.jl @@ -12,7 +12,7 @@ export derivative_r!, derivative_r_chrg!, derivative_r_ntrl! export derivative_z!, derivative_z_chrg!, derivative_z_ntrl! using ..calculus: derivative!, second_derivative!, reconcile_element_boundaries_MPI!, - apply_adv_fac! + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!, apply_adv_fac! using ..communication using ..type_definitions: mk_float using ..looping @@ -265,62 +265,9 @@ function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::Abstrac # now reconcile element boundaries across # processes with large message if z.nelement_local < z.nelement_global - # synchronize buffers - # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop - # -- or from a @serial_region or from an if statment isolating a single rank on a block - _block_synchronize() - @serial_region begin - # now deal with endpoints that are stored across ranks - comm = z.comm - nrank = z.nrank - irank = z.irank - # sending pattern is cyclic. First we send data form irank -> irank + 1 - # to fix the lower endpoints, then we send data from irank -> irank - 1 - # to fix upper endpoints. Special exception for the periodic points. - # receive_buffer[1] is for data received, send_buffer[1] is data to be sent - - # send highest end point on THIS rank - # pass data from irank -> irank + 1, receive data from irank - 1 - idst = mod(irank+1,nrank) # destination rank for sent data - isrc = mod(irank-1,nrank) # source rank for received data - rreq1 = MPI.Irecv!(z_receive_buffer1, comm; source=isrc, tag=1) - sreq1 = MPI.Isend(dfdz_upper_endpoints, comm; dest=idst, tag=1) - - # send lowest end point on THIS rank - # pass data from irank -> irank - 1, receive data from irank + 1 - idst = mod(irank-1,nrank) # destination rank for sent data - isrc = mod(irank+1,nrank) # source rank for received data - rreq2 = MPI.Irecv!(z_receive_buffer2, comm; source=isrc, tag=2) - sreq2 = MPI.Isend(dfdz_lower_endpoints, comm; dest=idst, tag=2) - stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) - - # now update receive buffers, taking into account the reconciliation - if irank == 0 - if z.bc == "periodic" - @. z_receive_buffer1 = 0.5 * (z_receive_buffer1 * dfdz_lower_endpoints) - else # directly use value from Cheb at extreme lower point - z_receive_buffer1 .= dfdz_lower_endpoints - end - else - @. z_receive_buffer1 = 0.5 * (z_receive_buffer1 * dfdz_lower_endpoints) - end - #now update the dfdz array -- using a slice appropriate to the dimension reconciled - @views dfdz[:,:,1] .= z_receive_buffer1 - - if irank == nrank-1 - if z.bc == "periodic" - @. z_receive_buffer2 = 0.5 * (z_receive_buffer2 * dfdz_upper_endpoints) - else #directly use value from Cheb - z_receive_buffer2 .= dfdz_upper_endpoints - end - else - @. z_receive_buffer2 = 0.5 * (z_receive_buffer2 * dfdz_upper_endpoints) - end - #now update the dfdz array -- using a slice appropriate to the dimension reconciled - @views dfdz[:,:,end] .= z_receive_buffer2 - end - # synchronize buffers - _block_synchronize() + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + dfdz, dfdz_lower_endpoints, dfdz_upper_endpoints, z_receive_buffer1, + z_receive_buffer2, z) end end @@ -897,64 +844,9 @@ function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::Abstrac # now reconcile element boundaries across # processes with large message if z.nelement_local < z.nelement_global - # synchronize buffers - # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop - # -- or from a @serial_region or from an if statment isolating a single rank on a block - _block_synchronize() - @serial_region begin - # now deal with endpoints that are stored across ranks - comm = z.comm - nrank = z.nrank - irank = z.irank - # sending pattern is cyclic. First we send data form irank -> irank + 1 - # to fix the lower endpoints, then we send data from irank -> irank - 1 - # to fix upper endpoints. Special exception for the periodic points. - # receive_buffer[1] is for data received, send_buffer[1] is data to be sent - - # send highest end point on THIS rank - # pass data from irank -> irank + 1, receive data from irank - 1 - idst = mod(irank+1,nrank) # destination rank for sent data - isrc = mod(irank-1,nrank) # source rank for received data - rreq1 = MPI.Irecv!(z_receive_buffer1, comm; source=isrc, tag=1) - sreq1 = MPI.Isend(dfdz_upper_endpoints, comm; dest=idst, tag=1) - - # send lowest end point on THIS rank - # pass data from irank -> irank - 1, receive data from irank + 1 - idst = mod(irank-1,nrank) # destination rank for sent data - isrc = mod(irank+1,nrank) # source rank for received data - rreq2 = MPI.Irecv!(z_receive_buffer2, comm; source=isrc, tag=2) - sreq2 = MPI.Isend(dfdz_lower_endpoints, comm; dest=idst, tag=2) - stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) - - # now update receive buffers, taking into account the reconciliation - if irank == 0 - if z.bc == "periodic" - # depending on adv_fac, update the extreme lower endpoint with data from irank = nrank -1 - apply_adv_fac!(z_receive_buffer1, adv_fac_lower_endpoints, dfdz_lower_endpoints, 1) - else # directly use value from Cheb at extreme lower point - z_receive_buffer1 .= dfdz_lower_endpoints - end - else # depending on adv_fac, update the lower endpoint with data from irank = nrank -1 - apply_adv_fac!(z_receive_buffer1, adv_fac_lower_endpoints, dfdz_lower_endpoints, 1) - end - #now update the dfdz array -- using a slice appropriate to the dimension reconciled - @views dfdz[:,:,1] .= z_receive_buffer1 - - if irank == nrank-1 - if z.bc == "periodic" - # depending on adv_fac, update the extreme upper endpoint with data from irank = 0 - apply_adv_fac!(z_receive_buffer2, adv_fac_upper_endpoints, dfdz_upper_endpoints, -1) - else #directly use value from Cheb - z_receive_buffer2 .= dfdz_upper_endpoints - end - else # enforce continuity at upper endpoint - apply_adv_fac!(z_receive_buffer2, adv_fac_upper_endpoints, dfdz_upper_endpoints, -1) - end - #now update the dfdz array -- using a slice appropriate to the dimension reconciled - @views dfdz[:,:,end] .= z_receive_buffer2 - end - # synchronize buffers - _block_synchronize() + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + dfdz, adv_fac_lower_buffer, adv_fac_upper_buffer, dfdz_lower_endpoints, + dfdz_upper_endpoints, z_receive_buffer1, z_receive_buffer2, z) end end From a79f8496835f96757a417acbc3896bb924029814 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 30 Aug 2024 19:18:11 +0100 Subject: [PATCH 056/107] Ensure block bndrys of precon fields consistent with distibuted MPI --- .../src/electron_kinetic_equation.jl | 68 +++++++++++++++---- moment_kinetics/src/nonlinear_solvers.jl | 2 + 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index b56b99ea0..9e9dd26c1 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -12,7 +12,9 @@ using ..derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using ..boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition!, skip_f_electron_bc_points_in_Jacobian, vpagrid_to_dzdt -using ..calculus: derivative!, second_derivative!, integral +using ..calculus: derivative!, second_derivative!, integral, + reconcile_element_boundaries_MPI!, + reconcile_element_boundaries_MPI_z_pdf_vpavperpz! using ..communication using ..gauss_legendre: gausslegendre_info using ..input_structs @@ -1019,30 +1021,37 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos right_preconditioner = split_precon! elseif nl_solver_params.preconditioner_type == "electron_lu" if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 - orig_lu, precon_matrix, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] + orig_lu, precon_matrix, input_buffer, output_buffer, adv_fac_lower, + adv_fac_upper = nl_solver_params.preconditioners[ir] fill_electron_kinetic_equation_Jacobian!( precon_matrix, f_electron_new, electron_ppar_new, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) + ir, evolve_ppar, adv_fac_lower, adv_fac_upper) begin_serial_region() if block_rank[] == 0 if size(orig_lu) == (1, 1) # Have not properly created the LU decomposition before, so # cannot reuse it. - nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, output_buffer) + nl_solver_params.preconditioners[ir] = + (lu(sparse(precon_matrix)), precon_matrix, input_buffer, + output_buffer, adv_fac_lower, adv_fac_upper) else # LU decomposition was previously created. The Jacobian always # has the same sparsity pattern, so by using `lu!()` we can # reuse some setup. lu!(orig_lu, sparse(precon_matrix); check=false) - nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer) + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer, + adv_fac_lower, adv_fac_upper) end else - nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer) + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer, + adv_fac_lower, adv_fac_upper) end end @@ -1050,7 +1059,8 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos function lu_precon!(x) precon_ppar, precon_f = x - precon_lu, _, input_buffer, output_buffer = nl_solver_params.preconditioners[ir] + precon_lu, _, input_buffer, output_buffer, adv_fac_lower, + adv_fac_upper = nl_solver_params.preconditioners[ir] begin_serial_region() counter = 1 @@ -1079,6 +1089,30 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos counter += 1 end + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] + end + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + precon_f, adv_fac_lower, adv_fac_upper, f_lower_endpoints, + f_upper_endpoints, receive_buffer1, receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = precon_ppar[1] + buffer_2[] = precon_ppar[end] + end + reconcile_element_boundaries_MPI!( + precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) + return nothing end @@ -2845,12 +2879,14 @@ function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, end """ - fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, moments, collisions, - composition, z, vperp, vpa, z_spectral, - vperp_specral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, external_source_settings, + fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, moments, + collisions, composition, z, vperp, vpa, + z_spectral, vperp_specral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) + ir, evolve_ppar, adv_fac_lower, + adv_fac_upper) Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if `evolve_ppar=true`) the electron energy equation. @@ -2861,7 +2897,8 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar) + evolve_ppar, adv_fac_lower, + adv_fac_upper) dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -2915,6 +2952,11 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome end z_speed = @view z_advect[1].speed[:,:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + adv_fac_lower[ivpa,ivperp] = -z_speed[ivpa,ivperp,1] + adv_fac_upper[ivpa,ivperp] = -z_speed[ivpa,ivperp,end] + end add_electron_z_advection_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 25f0a604b..e9278b39a 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -146,6 +146,8 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), + allocate_shared_float(coords.vpa.n,coords.vperp.n), + allocate_shared_float(coords.vpa.n,coords.vperp.n), ), reverse(outer_coord_sizes)) elseif preconditioner_type == "none" From 25e39ddf0cf473fb39890c0285f6d34efbe5a0ce Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 31 Aug 2024 12:19:47 +0100 Subject: [PATCH 057/107] Tweak backward-Euler solver parameters --- ...ctron_ppar-loworder-PareschiRusso2222.toml | 130 ++++++++++++++++++ ...netic-implicit-electron_ppar-loworder.toml | 9 +- .../src/electron_kinetic_equation.jl | 18 ++- 3 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml new file mode 100644 index 000000000..dda0eebaf --- /dev/null +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml @@ -0,0 +1,130 @@ +#runtime_plots = true +n_ion_species = 1 +n_neutral_species = 1 +electron_physics = "kinetic_electrons" +evolve_moments_density = true +evolve_moments_parallel_flow = true +evolve_moments_parallel_pressure = true +evolve_moments_conservation = true +recycling_fraction = 0.5 +T_e = 1.0 +T_wall = 0.1 +initial_density1 = 1.0 +initial_temperature1 = 1.0 +z_IC_option1 = "sinusoid" +z_IC_density_amplitude1 = 0.1 +z_IC_density_phase1 = 0.0 +z_IC_upar_amplitude1 = 0.1 +z_IC_upar_phase1 = 0.0 +z_IC_temperature_amplitude1 = 0.1 +z_IC_temperature_phase1 = 1.0 +vpa_IC_option1 = "gaussian" +vpa_IC_density_amplitude1 = 1.0 +vpa_IC_density_phase1 = 0.0 +vpa_IC_upar_amplitude1 = 0.0 +vpa_IC_upar_phase1 = 0.0 +vpa_IC_temperature_amplitude1 = 0.0 +vpa_IC_temperature_phase1 = 0.0 +initial_density2 = 1.0 +initial_temperature2 = 1.0 +z_IC_option2 = "sinusoid" +z_IC_density_amplitude2 = 0.001 +z_IC_density_phase2 = 0.0 +z_IC_upar_amplitude2 = 0.0 +z_IC_upar_phase2 = 0.0 +z_IC_temperature_amplitude2 = 0.0 +z_IC_temperature_phase2 = 0.0 +vpa_IC_option2 = "gaussian" +vpa_IC_density_amplitude2 = 1.0 +vpa_IC_density_phase2 = 0.0 +vpa_IC_upar_amplitude2 = 0.0 +vpa_IC_upar_phase2 = 0.0 +vpa_IC_temperature_amplitude2 = 0.0 +vpa_IC_temperature_phase2 = 0.0 +charge_exchange_frequency = 0.75 +ionization_frequency = 0.0 +constant_ionization_rate = false +r_ngrid = 1 +r_nelement = 1 +z_ngrid = 5 +z_nelement = 16 +z_nelement_local = 2 +z_bc = "periodic" +#z_discretization = "chebyshev_pseudospectral" +z_discretization = "gausslegendre_pseudospectral" +vpa_ngrid = 6 +vpa_nelement = 31 +vpa_L = 12.0 +vpa_bc = "zero" +#vpa_discretization = "chebyshev_pseudospectral" +vpa_discretization = "gausslegendre_pseudospectral" +vpa_element_spacing_option = "coarse_tails" +vz_ngrid = 6 +vz_nelement = 31 +vz_L = 12.0 +vz_bc = "zero" +#vz_discretization = "chebyshev_pseudospectral" +vz_discretization = "gausslegendre_pseudospectral" +vz_element_spacing_option = "coarse_tails" + +[timestepping] +type = "PareschiRusso2(2,2,2)" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 500000 +dt = 2.0e-4 +#nwrite = 50 +#nwrite_dfns = 50 +nwrite = 5 +nwrite_dfns = 5 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +#dt = 2.0e-8 +dt = 5.0e-5 +maximum_dt = 1.0 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-6 +atol = 1.0e-14 +minimum_dt = 1.0e-10 +initialization_residual_value = 2.5 +#converged_residual_value = 0.1 #1.0e-3 +converged_residual_value = 1.0e-2 +#debug_io = 10000 +constraint_forcing_rate = 1.0e-4 + +[nonlinear_solver] +#nonlinear_max_iterations = 20 #100 +nonlinear_max_iterations = 1000 +rtol = 1.0e-8 #1.0e-5 +atol = 1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 #1000 + +[ion_numerical_dissipation] +vpa_dissipation_coefficient = 1.0e0 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +#vpa_dissipation_coefficient = 2.0e2 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +vz_dissipation_coefficient = 1.0e-1 +force_minimum_pdf_value = 0.0 + +[krook_collisions] +use_krook = true + +frequency_option = "reference_parameters" +nuee0 = 1000.0 +nuei0 = 1000.0 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml index 689fa44a3..b5682534e 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -106,13 +106,16 @@ initialization_residual_value = 2.5 #converged_residual_value = 0.1 #1.0e-3 converged_residual_value = 1.0e-2 #debug_io = 10000 +constraint_forcing_rate = 1.0e-4 [nonlinear_solver] -nonlinear_max_iterations = 100 -rtol = 1.0e-5 -atol = 1.0e-12 +nonlinear_max_iterations = 20 #100 +rtol = 1.0e-8 #1.0e-5 +atol = 1.0e-16 #linear_restart = 40 #linear_restart = 200 +linear_restart = 5 +preconditioner_update_interval = 100 [ion_numerical_dissipation] vpa_dissipation_coefficient = 1.0e0 diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9e9dd26c1..527cd5085 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1317,7 +1317,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.previous_dt[] /= t_params.max_increase_factor println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < 2 + elseif nl_solver_params.max_linear_iterations_this_step[] < 4 # Only took a few iterations, so increase initial step size. print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) t_params.previous_dt[] *= t_params.max_increase_factor @@ -1331,15 +1331,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # the solver than the nonlinear iteration count, or the linear iterations # per nonlinear iteration #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) - if nl_solver_params.max_linear_iterations_this_step[] > 5 && t_params.dt[] > t_params.previous_dt[] - # Step succeeded, but took a lot of iterations so decrease step size. - t_params.dt[] /= t_params.max_increase_factor - #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.05 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] < 2 - #elseif nl_solver_params.max_nonlinear_iterations_this_step[] < 3 - # Only took a few iterations, so increase step size. - t_params.dt[] *= t_params.max_increase_factor - end + #if nl_solver_params.max_linear_iterations_this_step[] > 10 && t_params.dt[] > t_params.previous_dt[] + # # Step succeeded, but took a lot of iterations so decrease step size. + # t_params.dt[] /= t_params.max_increase_factor + #elseif nl_solver_params.max_linear_iterations_this_step[] < 4 + # # Only took a few iterations, so increase step size. + # t_params.dt[] *= t_params.max_increase_factor + #end end _block_synchronize() From f24379e5b6f4599c484db4697b425760e944e663 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 2 Sep 2024 08:49:58 +0100 Subject: [PATCH 058/107] Try-catch in case sparsity pattern of precon_matrix changes This should only happen rarely, if some element happens to be zero by chance when 'structurally' it should be a non-zero. There are probably better ways to handle this, but it works for now. --- moment_kinetics/src/electron_kinetic_equation.jl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 527cd5085..7d77ddaf1 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1043,7 +1043,16 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # LU decomposition was previously created. The Jacobian always # has the same sparsity pattern, so by using `lu!()` we can # reuse some setup. - lu!(orig_lu, sparse(precon_matrix); check=false) + try + lu!(orig_lu, sparse(precon_matrix); check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch") + orig_lu = lu(sparse(precon_matrix)) + end nl_solver_params.preconditioners[ir] = (orig_lu, precon_matrix, input_buffer, output_buffer, adv_fac_lower, adv_fac_upper) From 7156b5ee214475069c3b3ef396ec6193dd4b273f Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 2 Sep 2024 11:11:36 +0100 Subject: [PATCH 059/107] Repurpose stage_counter to solves_since_precon_update The `stage_counter` was only being used to decide when to update the preconditioner(s). Renaming makes this clearer, and avoids confusion if/when the counter is reset to force a preconditioner update. --- moment_kinetics/src/electron_kinetic_equation.jl | 8 ++++++-- moment_kinetics/src/initial_conditions.jl | 2 +- moment_kinetics/src/nonlinear_solvers.jl | 8 ++++---- moment_kinetics/src/vpa_advection.jl | 4 +++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 7d77ddaf1..2731d0a64 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -799,7 +799,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos num_diss_params.electron.moment_dissipation_coefficient, ir) if nl_solver_params.preconditioner_type == "electron_split_lu" - if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + nl_solver_params.solves_since_precon_update[] = 0 + dt = t_params.dt[] vth = @view moments.electron.vth[:,ir] me = composition.me_over_mi @@ -1020,7 +1022,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos left_preconditioner = identity right_preconditioner = split_precon! elseif nl_solver_params.preconditioner_type == "electron_lu" - if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + nl_solver_params.solves_since_precon_update[] = 0 + orig_lu, precon_matrix, input_buffer, output_buffer, adv_fac_lower, adv_fac_upper = nl_solver_params.preconditioners[ir] diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index f431c73f9..c96215527 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -754,7 +754,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.global_n_solves, nl_solver_params.electron_advance.global_nonlinear_iterations, nl_solver_params.electron_advance.global_linear_iterations, - nl_solver_params.electron_advance.stage_counter, + nl_solver_params.electron_advance.solves_since_precon_update, nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, nl_solver_params.electron_advance.preconditioner_update_interval, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index e9278b39a..d44e56d35 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -58,7 +58,7 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} global_n_solves::Ref{mk_int} global_nonlinear_iterations::Ref{mk_int} global_linear_iterations::Ref{mk_int} - stage_counter::Ref{mk_int} + solves_since_precon_update::Ref{mk_int} serial_solve::Bool max_nonlinear_iterations_this_step::Ref{mk_int} max_linear_iterations_this_step::Ref{mk_int} @@ -163,8 +163,8 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol nl_solver_input.linear_rtol, nl_solver_input.linear_atol, linear_restart, nl_solver_input.linear_max_restarts, H, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), - Ref(0), Ref(0), serial_solve, Ref(0), Ref(0), - preconditioner_type, + Ref(0), Ref(nl_solver_input.preconditioner_update_interval), + serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end @@ -184,7 +184,7 @@ function reset_nonlinear_per_stage_counters!(nl_solver_params::Union{nl_solver_i nl_solver_params.max_linear_iterations_this_step[] = 0 # Also increment the stage counter - nl_solver_params.stage_counter[] += 1 + nl_solver_params.solves_since_precon_update[] += 1 return nothing end diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 3d9b5897e..cb0fe073a 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -140,7 +140,9 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_ f_old = vpa.scratch7 .= f_old_no_bc apply_bc!(f_old) - #if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + #if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + # nl_solver_params.solves_since_precon_update[] = 0 + # advection_matrix = allocate_float(vpa.n, vpa.n) # advection_matrix .= 0.0 # for i ∈ 1:vpa.nelement_local From 3565cda88bbe0b0451be4e6b6b79f533fb7e0c2c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 2 Sep 2024 11:14:54 +0100 Subject: [PATCH 060/107] Recalculate electron preconditioner when Newton solve fails When the Newton solve fails due to taking too many iterations, it might be that the preconditioner was too old so recalculating it is useful. More importantly, when the Newton iteration fails, we decrease dt by a factor 0.5 - the large change in dt will make the preconditioner bad, so it needs to be updated. --- moment_kinetics/src/electron_kinetic_equation.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 2731d0a64..3718ee861 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1362,6 +1362,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end _block_synchronize() + # Force the preconditioner to be recalculated, because we have just + # changed `dt` by a fairly large amount. + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + # Swap old_scratch and new_scratch so that the next step restarts from the # same state scratch[1] = new_scratch From 77e8bb53d5d7bfacec77d0e798d2c5c6dcd7b200 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 2 Sep 2024 15:24:50 +0100 Subject: [PATCH 061/107] Make electron bc iteration a bit more robust If epsilonprime has the wrong sign, step vcut in the opposite direction to avoid vcut running off to infinity. --- moment_kinetics/src/electron_kinetic_equation.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 3718ee861..ac64e258a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2075,6 +2075,13 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp # respect to vcut delta_v = - epsilon / epsilonprime + if vcut > vthe[1,ir] && epsilonprime < 0.0 + # epsilon should be increasing with vcut at epsilon=0, so if + # epsilonprime is negative, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end + # Prevent the step size from getting too big, to make Newton iteration # more robust. delta_v = min(delta_v, 0.1 * vthe[1,ir]) @@ -2335,6 +2342,13 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp # respect to vcut delta_v = - epsilon / epsilonprime + if vcut > vthe[1,ir] && epsilonprime > 0.0 + # epsilon should be decreasing with vcut at epsilon=0, so if + # epsilonprime is positive, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end + # Prevent the step size from getting too big, to make Newton iteration # more robust. delta_v = min(delta_v, 0.1 * vthe[end,ir]) From 860bc0ba83ebc9fe5235bb8dcf917db71aef602d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 2 Sep 2024 18:02:46 +0100 Subject: [PATCH 062/107] Fix steady state residual calculation in electron_backward_euler!() Need to use the `dt[]` not `previous_dt[]`. --- moment_kinetics/src/electron_kinetic_equation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index ac64e258a..3a86981ad 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1407,7 +1407,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos begin_z_vperp_vpa_region() residual = steady_state_residuals(new_scratch.pdf_electron, old_scratch.pdf_electron, - t_params.previous_dt[]; use_mpi=true, + t_params.dt[]; use_mpi=true, only_max_abs=true) if global_rank[] == 0 residual = first(values(residual))[1] @@ -1416,7 +1416,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos ppar_residual = steady_state_residuals(new_scratch.electron_ppar, old_scratch.electron_ppar, - t_params.previous_dt[]; use_mpi=true, + t_params.dt[]; use_mpi=true, only_max_abs=true) if global_rank[] == 0 ppar_residual = first(values(ppar_residual))[1] From e658786b80bd0ba7fac586123749aff4bc86a322 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 3 Sep 2024 21:24:23 +0100 Subject: [PATCH 063/107] Fix Chodura condition diagnostic bzed is now a 2D variable, not a scalar. --- moment_kinetics/src/analysis.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/analysis.jl b/moment_kinetics/src/analysis.jl index fc342e050..7bc3a5037 100644 --- a/moment_kinetics/src/analysis.jl +++ b/moment_kinetics/src/analysis.jl @@ -165,7 +165,7 @@ function check_Chodura_condition(r, z, vperp, vpa, dens, upar, vth, composition, for it ∈ 1:ntime, ir ∈ 1:nr v_parallel = vpagrid_to_dzdt(vpa.grid, vth[1,ir,is,it], upar[1,ir,is,it], evolve_ppar, evolve_upar) - vpabar = @. v_parallel - 0.5 * geometry.rhostar * Er[1,ir,it] / geometry.bzed + vpabar = @. v_parallel - 0.5 * geometry.rhostar * Er[1,ir,it] / geometry.bzed[1,ir] # Get rid of a zero if it is there to avoid a blow up - f should be zero at that # point anyway @@ -187,7 +187,7 @@ function check_Chodura_condition(r, z, vperp, vpa, dens, upar, vth, composition, v_parallel = vpagrid_to_dzdt(vpa.grid, vth[end,ir,is,it], upar[end,ir,is,it], evolve_ppar, evolve_upar) - vpabar = @. v_parallel - 0.5 * geometry.rhostar * Er[end,ir,it] / geometry.bzed + vpabar = @. v_parallel - 0.5 * geometry.rhostar * Er[end,ir,it] / geometry.bzed[end,ir] # Get rid of a zero if it is there to avoid a blow up - f should be zero at that # point anyway From 4c77c65957e93544f5baf60d1c0fa46ef2b27bee Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 4 Sep 2024 15:42:01 +0100 Subject: [PATCH 064/107] Option for "compressed" grids By passing `*_element_spacing_option = "compressed"` or `*_element_spacing_option = "compressed_"` where `` is a number (if not given, it defaults to 4), a grid is generated that is 'compressed' - the element nearest the target has a width approximately 1/ of what the element width would be in a uniform grid. --- moment_kinetics/src/coordinates.jl | 89 ++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 976015583..f96d71e29 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -299,6 +299,95 @@ function set_element_boundaries(nelement_global, L, element_spacing_option, coor for j in 1:nsqrt element_boundaries[(nelement_global+1)+ 1 - j] = (L/2.0) - fac*(L/2.0)*((j-1)/(nsqrt-1))^2 end + elseif startswith(element_spacing_option, "compressed") + element_spacing_option_split = split(element_spacing_option, "_") + if length(element_spacing_option_split) == 1 + compression_factor = 4.0 + else + compression_factor = parse(mk_float, element_spacing_option_split[2]) + end + + #shifted_inds = collect(mk_float, 0:nelement_global) .- 0.5 .* nelement_global + ## Choose element boundary positions to be given by + ## s = A*shifted_inds + B*shifted_inds^3 + ## Choose A and B so that, with simin=-nelement_global/2: + ## s(simin) = -L/2 + ## s(simin+1) = -L/2 + L/nelement_global/compression_factor + ## i.e. so that the grid spacing of the element nearest the wall is + ## compression_factor smaller than the elements in a uniformly spaced grid. + ## simin*A + simin^3*B = -L/2 + ## A = -(L/2 + simin^3*B)/simin + ## + ## (simin+1)*A + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + ## -(simin+1)*(L/2 + simin^3*B)/simin + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + ## -(simin+1)*simin^3*B/simin + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + (simin+1)*L/2/simin + ## (simin+1)*simin^2*B - (simin+1)^3*B = L/2 - L/nelement_global/compression_factor - (simin+1)*L/2/simin + ## B = (L/2 - L/nelement_global/compression_factor - (simin+1)*L/2/simin) / ((simin+1)*simin^2 - (simin+1)^3) + + #simin = -nelement_global / 2.0 + #B = (L/2.0 - L/nelement_global/compression_factor - (simin+1.0)*L/2.0/simin) / ((simin+1.0)*simin^2 - (simin+1.0)^3) + #A = -(L/2.0 + simin^3*B)/simin + + #@. element_boundaries = A*shifted_inds + B*shifted_inds^3 + + # To have the grid spacing change as little as possible from one element to the + # next, the function that defines the element boundary positions should have + # constant curvature. The curvature has to change sign at the mid-point of the + # domain, so this means that the function must be defined piecewise - one piece + # for the lower half and one for the upper half. + # An apparently ideal way to do this would be to use a quadratic function, which + # would mean that the ratio of the sizes of adjacent elements is the same + # throughout the grid. However, a quadratic would mean a maximum compression + # factor of 2 before the function becomes non-monotonic, see next: + # We define the quadratic by making the gradient at the boundaries + # `compression_factor` larger than the gradient L of the linear function that + # would give a uniform grid. + # s(a) = A*a + B*a*|a| + # where -0.5≤a≤0.5, and + # s(0.5) = L/2 + # s'(0.5) = compression_factor*L + # so + # A/2 + B/4 = L/2 + # A + B = compression_factor*L + # ⇒ + # B = 2*(compression_factor - 1)*L + # A = L - B/2 = L - (compression_factor-1)*L = (2 - compression_factor)*L + # + # Therefore instead we choose a circular arc which can be monotonic while reaching + # any gradient. To make a circle sensible, normalise s by L for this version. + # (s-s0)^2 + (a-a0)^2 = r^2 + # where -0.5≤a≤0.5, and + # s(0) = 0 + # s(a) = 1/2 + # s'(a) = 1/compression_factor + # and for a>0, a0<0 and s0>0 while for a<0, a0>0 and s0<0. This gives + # s0^2 + a0^2 = r^2 + # (1/2-s0)^2 + (1/2-a0)^2 = r^2 = s0^2 + a0^2 + # 2*(1/2-s0)/compression_factor + 2*(1/2-a0) = 0 + # solving these + # a0 = (1/2-s0)/compression_factor + 1/2 + # 1/4 - s0 + s0^2 + 1/4 - a0 + a0^2 = s0^2 + a0^2 + # 1/2 - s0 - a0 = 0 + # s0 = 1/2 - a0 = 1/2 - (1/2-s0)/compression_factor - 1/2 + # (1 - 1/compression_factor)*s0 = -1/compression_factor/2 + # s0 = 1/compression_factor/2/(1/compression_factor-1) + if abs(compression_factor - 1.0) < 1.0e-12 + # compression_factor is too close to 1, which would be a singular value where + # s0=∞ and a0=-∞, so just use constant spacing. + for j in 1:nelement_global+1 + element_boundaries[j] = L*((j-1)/(nelement_global) - 0.5) + end + else + s0 = 1.0 / compression_factor / 2.0 / (1.0 / compression_factor - 1.0) + a0 = (0.5 - s0)/compression_factor + 0.5 + a = collect(0:nelement_global) ./ nelement_global .- 0.5 + mid_ind_plus = (nelement_global + 1) ÷ 2 + 1 + mid_ind_minus = nelement_global ÷ 2 + 1 + @. element_boundaries[1:mid_ind_minus] = + -L * (sqrt(s0^2 + a0^2 - (a[1:mid_ind_minus]+a0)^2) + s0) + @. element_boundaries[mid_ind_plus:end] = + L * (sqrt(s0^2 + a0^2 - (a[mid_ind_plus:end]-a0)^2) + s0) + end elseif element_spacing_option == "coarse_tails" # Element boundaries at # From b77176a1d1cd9d885da3a3e4250969455a77eecf Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 6 Sep 2024 10:42:57 +0100 Subject: [PATCH 065/107] Recalculate preconditioner when dt changes too much --- moment_kinetics/src/electron_kinetic_equation.jl | 9 +++++++++ moment_kinetics/src/nonlinear_solvers.jl | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 3a86981ad..25b453597 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1022,8 +1022,17 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos left_preconditioner = identity right_preconditioner = split_precon! elseif nl_solver_params.preconditioner_type == "electron_lu" + + if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || + t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = t_params.dt[] orig_lu, precon_matrix, input_buffer, output_buffer, adv_fac_lower, adv_fac_upper = nl_solver_params.preconditioners[ir] diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index d44e56d35..62686be1f 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -59,6 +59,7 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} global_nonlinear_iterations::Ref{mk_int} global_linear_iterations::Ref{mk_int} solves_since_precon_update::Ref{mk_int} + precon_dt::Ref{mk_float} serial_solve::Bool max_nonlinear_iterations_this_step::Ref{mk_int} max_linear_iterations_this_step::Ref{mk_int} @@ -164,7 +165,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol linear_restart, nl_solver_input.linear_max_restarts, H, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(nl_solver_input.preconditioner_update_interval), - serial_solve, Ref(0), Ref(0), preconditioner_type, + Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end From d2c5b181a5465ecb9f439041c277a7744e92f07e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 6 Sep 2024 12:21:09 +0100 Subject: [PATCH 066/107] Don't impose constraints as part of boundary condition inside NK solve The constraints are being softly forced as part of the residual, and imposing them, even just in the Newton iterations, seems to very negatively impact the convergence of the solver. --- .../src/electron_kinetic_equation.jl | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 25b453597..5b9b8f19e 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1160,7 +1160,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi) + composition.me_over_mi; bc_constraints=false) if evolve_ppar this_dens = moments.electron.dens @@ -1874,7 +1874,7 @@ end function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, - me_over_mi) + me_over_mi; bc_constraints=true) newton_tol = 1.0e-13 @@ -1937,22 +1937,29 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, c1, c1prime, c2, c2prime, d1, d1prime, e1, e1prime, e2, e2prime, u_over_vt) - alpha = a1 + 2.0 * a2 - alphaprime = a1prime + 2.0 * a2prime - beta = c1 + 2.0 * c2 - betaprime = c1prime + 2.0 * c2prime - gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta - gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime - delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 - deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime - - A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) - Aprime = (0.5 * betaprime - deltaprime - - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) - / (beta * gamma - alpha * delta) - ) / (beta * gamma - alpha * delta) - C = (1.0 - alpha * A) / beta - Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + if bc_constraints + alpha = a1 + 2.0 * a2 + alphaprime = a1prime + 2.0 * a2prime + beta = c1 + 2.0 * c2 + betaprime = c1prime + 2.0 * c2prime + gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta + gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime + delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 + deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime + + A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) + Aprime = (0.5 * betaprime - deltaprime + - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) + / (beta * gamma - alpha * delta) + ) / (beta * gamma - alpha * delta) + C = (1.0 - alpha * A) / beta + Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + else + A = 1.0 + Aprime = 0.0 + C = 0.0 + Cprime = 0.0 + end epsilon = A * b1 + C * d1 - u_over_vt epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime From 9bf4bf44017d372b9a99682d3312175950ff35c0 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 16:33:21 +0100 Subject: [PATCH 067/107] Tweak pseudo-timestep update parameters in electron_backward_euler!() --- .../src/electron_kinetic_equation.jl | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5b9b8f19e..cf2b04df4 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1332,14 +1332,14 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.previous_dt[] = t_params.dt[] println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] > 10 + elseif nl_solver_params.max_linear_iterations_this_step[] > 100 # Step succeeded, but took a lot of iterations so decrease initial # step size. print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) t_params.previous_dt[] /= t_params.max_increase_factor println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < 4 + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 # Only took a few iterations, so increase initial step size. print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) t_params.previous_dt[] *= t_params.max_increase_factor @@ -1353,13 +1353,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # the solver than the nonlinear iteration count, or the linear iterations # per nonlinear iteration #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) - #if nl_solver_params.max_linear_iterations_this_step[] > 10 && t_params.dt[] > t_params.previous_dt[] - # # Step succeeded, but took a lot of iterations so decrease step size. - # t_params.dt[] /= t_params.max_increase_factor - #elseif nl_solver_params.max_linear_iterations_this_step[] < 4 - # # Only took a few iterations, so increase step size. - # t_params.dt[] *= t_params.max_increase_factor - #end + if nl_solver_params.max_linear_iterations_this_step[] > 100 && t_params.dt[] > t_params.previous_dt[] + # Step succeeded, but took a lot of iterations so decrease step size. + t_params.dt[] /= t_params.max_increase_factor + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 + # Only took a few iterations, so increase step size. + t_params.dt[] *= t_params.max_increase_factor + end end _block_synchronize() @@ -1526,6 +1526,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.max_t_increment_this_ion_step[]) end + initial_dt_scale_factor = 0.1 + if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] + # If dt has increased a lot, we can probably try a larger initial dt for the next + # solve. + t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] + end + if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] # Reset dt in case it was reduced to be less than 0.5*ion_dt begin_serial_region() From 4b9aea6c90fd4e7651f9128bbacd346353827a7b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 17:15:44 +0100 Subject: [PATCH 068/107] Update electron Jacobian to be compatible with 'multiple sources' --- .../src/electron_kinetic_equation.jl | 23 +++++++------ moment_kinetics/src/electron_vpa_advection.jl | 14 +++++--- moment_kinetics/src/external_sources.jl | 34 +++++++++++++------ 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index b33d45e32..c5fb3c7a5 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -36,7 +36,7 @@ using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_v add_electron_vpa_advection_to_Jacobian! using ..em_fields: update_phi! using ..external_sources: total_external_electron_sources!, - add_external_electron_source_to_Jacobian! + add_total_external_electron_source_to_Jacobian! using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee, get_collision_frequency_ei, @@ -3026,9 +3026,9 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome add_electron_krook_collisions_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size) - add_external_electron_source_to_Jacobian!( - jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, vpa, - dt, ir; ppar_offset=pdf_size) + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, + vperp, vpa, dt, ir; ppar_offset=pdf_size) add_electron_implicit_constraint_forcing_to_Jacobian!( jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt, ir) @@ -3586,12 +3586,15 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( dt * f[ivpa,ivperp,iz] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end - if external_source_settings.electron.active - # Source terms from `add_contribution_from_pdf_term!()` - jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz] / dens[iz] - - (0.5 * source_pressure_amplitude[iz] - + source_momentum_amplitude[iz]) / ppar[iz] - ) + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + # Source terms from `add_contribution_from_pdf_term!()` + jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens[iz] + - (0.5 * source_pressure_amplitude[iz,ir,index] + + source_momentum_amplitude[iz,ir,index]) / ppar[iz] + ) + end end jacobian_matrix[row,ppar_offset+iz] += dt * f[ivpa,ivperp,iz] * diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 0cc28908d..bbeb9e1b9 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -253,11 +253,15 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) ) * dpdf_dvpa[ivpa,ivperp,iz] - if external_source_settings.electron.active - jacobian_matrix[row,ppar_offset+iz] += dt * ( - -0.5*source_density_amplitude[iz]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 - + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz] + 2.0*upar[iz]*source_momentum_amplitude[iz])/ppar[iz]^2 - ) * dpdf_dvpa[ivpa,ivperp,iz] + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.5*source_density_amplitude[iz,ir,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,ir,index] + + 2.0*upar[iz]*source_momentum_amplitude[iz,ir,index])/ppar[iz]^2 + ) * dpdf_dvpa[ivpa,ivperp,iz] + end end for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 0b6cea02e..2a85fb71d 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -949,14 +949,14 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p `pdf_in`, `electron_density`, and `electron_upar` should have no r-dimension. """ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, - moments, composition, electron_source_settings, vperp, - vpa, dt) + moments, composition, electron_source, index, + vperp, vpa, dt) begin_r_z_vperp_region() me_over_mi = composition.me_over_mi - source_amplitude = moments.electron.external_source_amplitude - source_T = electron_source_settings.source_T + @views source_amplitude = moments.electron.external_source_amplitude[:,ir,index] + source_T = electron_source.source_T if vperp.n == 1 vth_factor = 1.0 / sqrt(source_T / me_over_mi) else @@ -993,9 +993,21 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u return nothing end -function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, - me, z_speed, external_source_settings, - z, vperp, vpa, dt, ir; f_offset=0, +function add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir; + f_offset=0, ppar_offset=0) + for index ∈ eachindex(electron_sources) + add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, + z_speed, electron_sources[index], index, + z, vperp, vpa, dt, ir; + f_offset=f_offset, + ppar_offset=ppar_offset) + end +end + +function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, + z_speed, electron_source, index, z, + vperp, vpa, dt, ir; f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " @@ -1005,12 +1017,12 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n - if !external_source_settings.electron.active + if !electron_source.active return nothing end - source_amplitude = moments.electron.external_source_amplitude - source_T = external_source_settings.electron.source_T + source_amplitude = @view moments.electron.external_source_amplitude[:,ir,index] + source_T = electron_source.source_T dens = @view moments.electron.dens[:,ir] upar = @view moments.electron.upar[:,ir] ppar = @view moments.electron.ppar[:,ir] @@ -1025,7 +1037,7 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, v_size = vperp.n * vpa.n begin_z_vperp_vpa_region() - if external_source_settings.electron.source_type == "energy" + if electron_source.source_type == "energy" @loop_z_vperp_vpa iz ivperp ivpa begin if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) From 48858603eb6e4bd619b23a412f10d9c0e41e06d3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 18:09:53 +0100 Subject: [PATCH 069/107] Update kinetic electron input files with new input format --- ...ctron_ppar-loworder-PareschiRusso2222.toml | 165 ++++++++++-------- ...netic-implicit-electron_ppar-loworder.toml | 165 ++++++++++-------- 2 files changed, 184 insertions(+), 146 deletions(-) diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml index dda0eebaf..0b302dae3 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml @@ -1,71 +1,97 @@ -#runtime_plots = true -n_ion_species = 1 -n_neutral_species = 1 +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +ionization_frequency = 0.0 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[krook_collisions] +nuee0 = 1000.0 +use_krook = true +frequency_option = "reference_parameters" +nuei0 = 1000.0 + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 16 +nelement_local = 2 +bc = "periodic" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "sinusoid" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 1.0 electron_physics = "kinetic_electrons" -evolve_moments_density = true -evolve_moments_parallel_flow = true -evolve_moments_parallel_pressure = true -evolve_moments_conservation = true recycling_fraction = 0.5 -T_e = 1.0 -T_wall = 0.1 -initial_density1 = 1.0 -initial_temperature1 = 1.0 -z_IC_option1 = "sinusoid" -z_IC_density_amplitude1 = 0.1 -z_IC_density_phase1 = 0.0 -z_IC_upar_amplitude1 = 0.1 -z_IC_upar_phase1 = 0.0 -z_IC_temperature_amplitude1 = 0.1 -z_IC_temperature_phase1 = 1.0 -vpa_IC_option1 = "gaussian" -vpa_IC_density_amplitude1 = 1.0 -vpa_IC_density_phase1 = 0.0 -vpa_IC_upar_amplitude1 = 0.0 -vpa_IC_upar_phase1 = 0.0 -vpa_IC_temperature_amplitude1 = 0.0 -vpa_IC_temperature_phase1 = 0.0 -initial_density2 = 1.0 -initial_temperature2 = 1.0 -z_IC_option2 = "sinusoid" -z_IC_density_amplitude2 = 0.001 -z_IC_density_phase2 = 0.0 -z_IC_upar_amplitude2 = 0.0 -z_IC_upar_phase2 = 0.0 -z_IC_temperature_amplitude2 = 0.0 -z_IC_temperature_phase2 = 0.0 -vpa_IC_option2 = "gaussian" -vpa_IC_density_amplitude2 = 1.0 -vpa_IC_density_phase2 = 0.0 -vpa_IC_upar_amplitude2 = 0.0 -vpa_IC_upar_phase2 = 0.0 -vpa_IC_temperature_amplitude2 = 0.0 -vpa_IC_temperature_phase2 = 0.0 -charge_exchange_frequency = 0.75 -ionization_frequency = 0.0 -constant_ionization_rate = false -r_ngrid = 1 -r_nelement = 1 -z_ngrid = 5 -z_nelement = 16 -z_nelement_local = 2 -z_bc = "periodic" -#z_discretization = "chebyshev_pseudospectral" -z_discretization = "gausslegendre_pseudospectral" -vpa_ngrid = 6 -vpa_nelement = 31 -vpa_L = 12.0 -vpa_bc = "zero" -#vpa_discretization = "chebyshev_pseudospectral" -vpa_discretization = "gausslegendre_pseudospectral" -vpa_element_spacing_option = "coarse_tails" -vz_ngrid = 6 -vz_nelement = 31 -vz_L = 12.0 -vz_bc = "zero" -#vz_discretization = "chebyshev_pseudospectral" -vz_discretization = "gausslegendre_pseudospectral" -vz_element_spacing_option = "coarse_tails" +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "sinusoid" +density_amplitude = 0.1 +temperature_amplitude = 0.1 +density_phase = 0.0 +upar_amplitude = 0.1 +temperature_phase = 1.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 [timestepping] type = "PareschiRusso2(2,2,2)" @@ -121,10 +147,3 @@ force_minimum_pdf_value = 0.0 [neutral_numerical_dissipation] vz_dissipation_coefficient = 1.0e-1 force_minimum_pdf_value = 0.0 - -[krook_collisions] -use_krook = true - -frequency_option = "reference_parameters" -nuee0 = 1000.0 -nuei0 = 1000.0 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml index b5682534e..b73a980fa 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -1,71 +1,97 @@ -#runtime_plots = true -n_ion_species = 1 -n_neutral_species = 1 +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +ionization_frequency = 0.0 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[krook_collisions] +nuee0 = 1000.0 +use_krook = true +frequency_option = "reference_parameters" +nuei0 = 1000.0 + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 16 +#nelement_local = 16 +bc = "periodic" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "sinusoid" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 1.0 electron_physics = "kinetic_electrons" -evolve_moments_density = true -evolve_moments_parallel_flow = true -evolve_moments_parallel_pressure = true -evolve_moments_conservation = true recycling_fraction = 0.5 -T_e = 1.0 -T_wall = 0.1 -initial_density1 = 1.0 -initial_temperature1 = 1.0 -z_IC_option1 = "sinusoid" -z_IC_density_amplitude1 = 0.1 -z_IC_density_phase1 = 0.0 -z_IC_upar_amplitude1 = 0.1 -z_IC_upar_phase1 = 0.0 -z_IC_temperature_amplitude1 = 0.1 -z_IC_temperature_phase1 = 1.0 -vpa_IC_option1 = "gaussian" -vpa_IC_density_amplitude1 = 1.0 -vpa_IC_density_phase1 = 0.0 -vpa_IC_upar_amplitude1 = 0.0 -vpa_IC_upar_phase1 = 0.0 -vpa_IC_temperature_amplitude1 = 0.0 -vpa_IC_temperature_phase1 = 0.0 -initial_density2 = 1.0 -initial_temperature2 = 1.0 -z_IC_option2 = "sinusoid" -z_IC_density_amplitude2 = 0.001 -z_IC_density_phase2 = 0.0 -z_IC_upar_amplitude2 = 0.0 -z_IC_upar_phase2 = 0.0 -z_IC_temperature_amplitude2 = 0.0 -z_IC_temperature_phase2 = 0.0 -vpa_IC_option2 = "gaussian" -vpa_IC_density_amplitude2 = 1.0 -vpa_IC_density_phase2 = 0.0 -vpa_IC_upar_amplitude2 = 0.0 -vpa_IC_upar_phase2 = 0.0 -vpa_IC_temperature_amplitude2 = 0.0 -vpa_IC_temperature_phase2 = 0.0 -charge_exchange_frequency = 0.75 -ionization_frequency = 0.0 -constant_ionization_rate = false -r_ngrid = 1 -r_nelement = 1 -z_ngrid = 5 -z_nelement = 16 -#z_nelement_local = 16 -z_bc = "periodic" -#z_discretization = "chebyshev_pseudospectral" -z_discretization = "gausslegendre_pseudospectral" -vpa_ngrid = 6 -vpa_nelement = 31 -vpa_L = 12.0 -vpa_bc = "zero" -#vpa_discretization = "chebyshev_pseudospectral" -vpa_discretization = "gausslegendre_pseudospectral" -vpa_element_spacing_option = "coarse_tails" -vz_ngrid = 6 -vz_nelement = 31 -vz_L = 12.0 -vz_bc = "zero" -#vz_discretization = "chebyshev_pseudospectral" -vz_discretization = "gausslegendre_pseudospectral" -vz_element_spacing_option = "coarse_tails" +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "sinusoid" +density_amplitude = 0.1 +temperature_amplitude = 0.1 +density_phase = 0.0 +upar_amplitude = 0.1 +temperature_phase = 1.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 [timestepping] type = "KennedyCarpenterARK324" @@ -129,10 +155,3 @@ force_minimum_pdf_value = 0.0 [neutral_numerical_dissipation] vz_dissipation_coefficient = 1.0e-1 force_minimum_pdf_value = 0.0 - -[krook_collisions] -use_krook = true - -frequency_option = "reference_parameters" -nuee0 = 1000.0 -nuei0 = 1000.0 From da57d9de8ab52b11a867341e955477cd5b529be7 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 22:45:00 +0100 Subject: [PATCH 070/107] Fix merge of input refactor and multiple sources with implicit electrons --- .../src/electron_fluid_equations.jl | 12 +- moment_kinetics/src/electron_vpa_advection.jl | 24 +- moment_kinetics/src/external_sources.jl | 12 +- moment_kinetics/test/jacobian_matrix_tests.jl | 282 +++++++++--------- .../test/nonlinear_solver_tests.jl | 4 +- 5 files changed, 175 insertions(+), 159 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 4c177b9a0..cead7d5f5 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -257,12 +257,12 @@ function electron_energy_equation_no_r!(ppar_out, ppar_in, electron_density, for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views pressure_source_amplitude = moments.external_source_pressure_amplitude[:, :, index] - @views density_source_amplitude = moments.external_source_density_amplitude[:, :, index] + pressure_source_amplitude = @view moments.external_source_pressure_amplitude[:, ir, index] + density_source_amplitude = @view moments.external_source_density_amplitude[:, ir, index] @loop_z iz begin - ppar_out[iz,ir] += dt * (2.0 * pressure_source_amplitude[iz,ir] - - T_in[iz,ir] * density_source_amplitude[iz,ir]) / - electron_density[iz,ir] + ppar_out[iz] += dt * (2.0 * pressure_source_amplitude[iz] + - T_in[iz] * density_source_amplitude[iz]) / + electron_density[iz] end end end @@ -343,7 +343,7 @@ function electron_energy_equation_no_r!(ppar_out, ppar_in, electron_density, for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views source_amplitude = moments.external_source_pressure_amplitude[:, ir, index] + source_amplitude = @view moments.external_source_pressure_amplitude[:, ir, index] @loop_z iz begin ppar_out[iz] += dt * source_amplitude[iz] end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index bbeb9e1b9..6fa6b34e8 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -67,13 +67,13 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, @views source_density_amplitude = moments.electron.external_source_density_amplitude[:, ir, index] @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, ir, index] @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, ir, index] - @loop_r_z ir iz begin - term1 = source_density_amplitude[iz,ir] * upar[iz,ir]/(density[iz,ir]*vth[iz,ir]) + @loop_z iz begin + term1 = source_density_amplitude[iz] * upar[iz]/(density[iz]*vth[iz]) term2_over_vpa = - -0.5 * (source_pressure_amplitude[iz,ir] + - 2.0 * upar[iz,ir] * source_momentum_amplitude[iz,ir]) / - ppar[iz,ir] + - 0.5 * source_density_amplitude[iz,ir] / density[iz,ir] + -0.5 * (source_pressure_amplitude[iz] + + 2.0 * upar[iz] * source_momentum_amplitude[iz]) / + ppar[iz] + + 0.5 * source_density_amplitude[iz] / density[iz] @loop_vperp_vpa ivperp ivpa begin advect.speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa end @@ -108,9 +108,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n v_size = vperp.n * vpa.n - source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir] - source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir] - source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir] + source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir,:] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir,:] dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] begin_z_vperp_region() @@ -257,9 +257,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, electron_source = external_source_settings.electron[index] if electron_source.active jacobian_matrix[row,ppar_offset+iz] += dt * ( - -0.5*source_density_amplitude[iz,ir,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 - + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,ir,index] - + 2.0*upar[iz]*source_momentum_amplitude[iz,ir,index])/ppar[iz]^2 + -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index] + + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2 ) * dpdf_dvpa[ivpa,ivperp,iz] end end diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 2a85fb71d..7b60f7b6a 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -921,18 +921,20 @@ function external_ion_source!(pdf, fvec, moments, ion_source, index, vperp, vpa, end """ - total_external_electron_sources!(pdf, fvec, moments, electron_sources, vperp, vpa, dt, scratch_dummy) + total_external_electron_sources!(pdf_out, pdf_in, electron_density, electron_upar, + moments, composition, electron_sources, vperp, + vpa, dt, ir) Contribute all of the electron sources to the electron pdf, one by one. """ function total_external_electron_sources!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_sources, vperp, - vpa, dt) + vpa, dt, ir) for index ∈ eachindex(electron_sources) if electron_sources[index].active external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_sources[index], index, - vperp, vpa, dt) + vperp, vpa, dt, ir) end end return nothing @@ -941,7 +943,7 @@ end """ external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_source, index, vperp, - vpa, dt) + vpa, dt, ir) Add external source term to the electron kinetic equation. @@ -950,7 +952,7 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p """ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_source, index, - vperp, vpa, dt) + vperp, vpa, dt, ir) begin_r_z_vperp_region() me_over_mi = composition.me_over_mi diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index c0a26b9f4..419eef809 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -25,8 +25,8 @@ using moment_kinetics.electron_vpa_advection: electron_vpa_advection!, using moment_kinetics.electron_z_advection: electron_z_advection!, update_electron_speed_z!, add_electron_z_advection_to_Jacobian! -using moment_kinetics.external_sources: external_electron_source!, - add_external_electron_source_to_Jacobian! +using moment_kinetics.external_sources: total_external_electron_sources!, + add_total_external_electron_source_to_Jacobian! using moment_kinetics.krook_collisions: electron_krook_collisions!, add_electron_krook_collisions_to_Jacobian! using moment_kinetics.looping @@ -54,122 +54,135 @@ zero = 1.0e-14 # * For `z_bc = "periodic"`, the Jacobian matrices (by design) do not account for the # periodicity. This should be fine when they are used as preconditioners, but does # introduce errors at the periodic boundaries which would complicate testing. -test_input = Dict("run_name" => "jacobian_matrix", - "n_ion_species" => 1, - "n_neutral_species" => 1, - "electron_physics" => "kinetic_electrons", - "evolve_moments_density" => true, - "evolve_moments_parallel_flow" => true, - "evolve_moments_parallel_pressure" => true, - "evolve_moments_conservation" => true, - "recycling_fraction" => 0.5, - "T_e" => 1.0, - "T_wall" => 0.1, - "initial_density1" => 1.0, - "initial_temperature1" => 1.0, - "z_IC_option1" => "sinusoid", - "z_IC_density_amplitude1" => 0.1, - "z_IC_density_phase1" => mk_float(π), - "z_IC_upar_amplitude1" => 0.1, - "z_IC_upar_phase1" => mk_float(π), - "z_IC_temperature_amplitude1" => 0.1, - "z_IC_temperature_phase1" => mk_float(π), - "vpa_IC_option1" => "gaussian", - "vpa_IC_density_amplitude1" => 1.0, - "vpa_IC_density_phase1" => 0.0, - "vpa_IC_upar_amplitude1" => 0.0, - "vpa_IC_upar_phase1" => 0.0, - "vpa_IC_temperature_amplitude1" => 0.0, - "vpa_IC_temperature_phase1" => 0.0, - "initial_density2" => 1.0, - "initial_temperature2" => 1.0, - "z_IC_option2" => "sinusoid", - "z_IC_density_amplitude2" => 0.001, - "z_IC_density_phase2" => mk_float(π), - "z_IC_upar_amplitude2" => 0.0, - "z_IC_upar_phase2" => mk_float(π), - "z_IC_temperature_amplitude2" => 0.0, - "z_IC_temperature_phase2" => mk_float(π), - "vpa_IC_option2" => "gaussian", - "vpa_IC_density_amplitude2" => 1.0, - "vpa_IC_density_phase2" => 0.0, - "vpa_IC_upar_amplitude2" => 0.0, - "vpa_IC_upar_phase2" => 0.0, - "vpa_IC_temperature_amplitude2" => 0.0, - "vpa_IC_temperature_phase2" => 0.0, - "charge_exchange_frequency" => 0.75, - "ionization_frequency" => 0.0, - "constant_ionization_rate" => false, - "r_ngrid" => 1, - "r_nelement" => 1, - "z_ngrid" => 9, - "z_nelement" => 16, - "z_bc" => "constant", - "z_discretization" => "gausslegendre_pseudospectral", - "vpa_ngrid" => 6, - "vpa_nelement" => 31, - "vpa_L" => 12.0, - "vpa_bc" => "zero", - "vpa_discretization" => "gausslegendre_pseudospectral", - "vpa_element_spacing_option" => "coarse_tails", - "vz_ngrid" => 6, - "vz_nelement" => 31, - "vz_L" => 12.0, - "vz_bc" => "zero", - "vz_discretization" => "gausslegendre_pseudospectral", - "vz_element_spacing_option" => "coarse_tails", - "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", - "implicit_electron_advance" => false, - "implicit_electron_ppar" => true, - "implicit_ion_advance" => false, - "implicit_vpa_advection" => false, - "nstep" => 1, - "dt" => ion_dt, - "minimum_dt" => 1.0e-7, - "rtol" => 1.0e-4, - "max_increase_factor_near_last_fail" => 1.001, - "last_fail_proximity_factor" => 1.1, - "max_increase_factor" => 1.05, - "nwrite" => 10000, - "nwrite_dfns" => 10000, - "steady_state_residual" => true, - "converged_residual_value" => 1.0e-3, - ), - "electron_timestepping" => Dict{String,Any}("nstep" => 1, - "dt" => dt, - "maximum_dt" => 1.0, - "nwrite" => 10000, - "nwrite_dfns" => 100000, - "type" => "Fekete4(3)", - "rtol" => 1.0e-6, - "atol" => 1.0e-14, - "minimum_dt" => 1.0e-10, - "initialization_residual_value" => 2.5, - "converged_residual_value" => 1.0e-2, - "constraint_forcing_rate" => 2.321, - ), - "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, - "rtol" => 1.0e-5, - "atol" => 1.0e-15, - "preconditioner_update_interval" => 1, +test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix", + ), + "composition" => OptionsDict("n_ion_species" => 1, + "n_neutral_species" => 1, + "electron_physics" => "kinetic_electrons", + "recycling_fraction" => 0.5, + "T_e" => 1.0, + "T_wall" => 0.1, + ), + "evolve_moments" => OptionsDict("density" => true, + "parallel_flow" => true, + "parallel_pressure" => true, + "moments_conservation" => true, ), - "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, - "force_minimum_pdf_value" => 0.0, - ), - "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, - "force_minimum_pdf_value" => 0.0, - ), - "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, - "force_minimum_pdf_value" => 0.0, - ), - "ion_source" => Dict{String,Any}("active" => true, - "z_profile" => "gaussian", - "z_width" => 0.125, - "source_strength" => 0.1, - "source_T" => 2.0, - ), - "krook_collisions" => Dict{String,Any}("use_krook" => true), - ) + "ion_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0, + ), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "sinusoid", + "density_amplitude" => 0.1, + "density_phase" => mk_float(π), + "upar_amplitude" => 0.1, + "upar_phase" => mk_float(π), + "temperature_amplitude" => 0.1, + "temperature_phase" => mk_float(π), + ), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0, + ), + "neutral_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0, + ), + "z_IC_neutral_species_1" => OptionsDict("initialization_option" => "sinusoid", + "density_amplitude" => 0.001, + "density_phase" => mk_float(π), + "upar_amplitude" => 0.0, + "upar_phase" => mk_float(π), + "temperature_amplitude" => 0.0, + "temperature_phase" => mk_float(π), + ), + "vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0, + ), + "reactions" => OptionsDict("charge_exchange_frequency" => 0.75, + "ionization_frequency" => 0.0, + ), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + ), + "z" => OptionsDict("ngrid" => 9, + "nelement" => 16, + "bc" => "constant", + "discretization" => "gausslegendre_pseudospectral", + ), + "vpa" => OptionsDict("ngrid" => 6, + "nelement" => 31, + "L" => 12.0, + "bc" => "zero", + "discretization" => "gausslegendre_pseudospectral", + "element_spacing_option" => "coarse_tails", + ), + "vz" => OptionsDict("ngrid" => 6, + "nelement" => 31, + "L" => 12.0, + "bc" => "zero", + "discretization" => "gausslegendre_pseudospectral", + "element_spacing_option" => "coarse_tails", + ), + "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 1, + "dt" => ion_dt, + "minimum_dt" => 1.0e-7, + "rtol" => 1.0e-4, + "max_increase_factor_near_last_fail" => 1.001, + "last_fail_proximity_factor" => 1.1, + "max_increase_factor" => 1.05, + "nwrite" => 10000, + "nwrite_dfns" => 10000, + "steady_state_residual" => true, + "converged_residual_value" => 1.0e-3, + ), + "electron_timestepping" => Dict{String,Any}("nstep" => 1, + "dt" => dt, + "maximum_dt" => 1.0, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "type" => "Fekete4(3)", + "rtol" => 1.0e-6, + "atol" => 1.0e-14, + "minimum_dt" => 1.0e-10, + "initialization_residual_value" => 2.5, + "converged_residual_value" => 1.0e-2, + "constraint_forcing_rate" => 2.321, + ), + "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, + "rtol" => 1.0e-5, + "atol" => 1.0e-15, + "preconditioner_update_interval" => 1, + ), + "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, + "force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, + "force_minimum_pdf_value" => 0.0, + ), + "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, + "force_minimum_pdf_value" => 0.0, + ), + "ion_source_1" => Dict{String,Any}("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.125, + "source_strength" => 0.1, + "source_T" => 2.0, + ), + "krook_collisions" => Dict{String,Any}("use_krook" => true), + ) function get_mk_state(test_input) mk_state = nothing @@ -211,7 +224,7 @@ end # error (∼epsilon^2) could be multiplied by ∼vth*vpa.L/2∼sqrt(2)*60*6≈500. function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_z_advection" + test_input["output"]["run_name"] *= "_electron_z_advection" println(" electron_z_advection") @testset "electron_z_advection" begin @@ -439,7 +452,7 @@ end function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_vpa_advection" + test_input["output"]["run_name"] *= "_electron_vpa_advection" println(" electron_vpa_advection") @testset "electron_vpa_advection" begin @@ -707,7 +720,7 @@ end function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_contribution_from_electron_pdf_term" + test_input["output"]["run_name"] *= "_contribution_from_electron_pdf_term" println(" contribution_from_electron_pdf_term") @testset "contribution_from_electron_pdf_term" begin @@ -961,7 +974,7 @@ end function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_dissipation_term" + test_input["output"]["run_name"] *= "_electron_dissipation_term" println(" electron_dissipation_term") @testset "electron_dissipation_term" begin @@ -1193,7 +1206,7 @@ end function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_krook_collisions" + test_input["output"]["run_name"] *= "_electron_krook_collisions" println(" electron_krook_collisions") @testset "electron_krook_collisions" begin @@ -1431,7 +1444,7 @@ end function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_external_electron_source" + test_input["output"]["run_name"] *= "_external_electron_source" println(" external_electron_source") @testset "external_electron_source" begin @@ -1513,9 +1526,9 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) end end - add_external_electron_source_to_Jacobian!( - jacobian_matrix, f, moments, me, z_speed, external_source_settings, z, vperp, - vpa, dt, ir; ppar_offset=pdf_size) + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, + z, vperp, vpa, dt, ir; ppar_offset=pdf_size) function residual_func!(residual, this_f, this_p) begin_z_region() @@ -1544,9 +1557,10 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) @loop_z_vperp_vpa iz ivperp ivpa begin residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] end - external_electron_source!(residual, this_f, dens, upar, moments, composition, - external_source_settings.electron, vperp, vpa, dt, - ir) + total_external_electron_sources!(residual, this_f, dens, upar, moments, + composition, + external_source_settings.electron, vperp, + vpa, dt, ir) # Now # residual = f_electron_old + dt*RHS(f_electron_newvar) # so update to desired residual @@ -1703,7 +1717,7 @@ end # correct. function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsilon)) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_implicit_constraint_forcing" + test_input["output"]["run_name"] *= "_electron_implicit_constraint_forcing" println(" electron_implicit_constraint_forcing") @testset "electron_implicit_constraint_forcing" begin @@ -1938,7 +1952,7 @@ end function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_energy_equation" + test_input["output"]["run_name"] *= "_electron_energy_equation" println(" electron_energy_equation") @testset "electron_energy_equation" begin @@ -2147,7 +2161,7 @@ end function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_ion_dt_forcing_of_electron_ppar" + test_input["output"]["run_name"] *= "_ion_dt_forcing_of_electron_ppar" println(" ion_dt_forcing_of_electron_ppar") @testset "ion_dt_forcing_of_electron_ppar" begin @@ -2342,7 +2356,7 @@ end function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) test_input = deepcopy(test_input) - test_input["run_name"] *= "_electron_kinetic_equation" + test_input["output"]["run_name"] *= "_electron_kinetic_equation" println(" electron_kinetic_equation") @testset "electron_kinetic_equation" begin @@ -2590,7 +2604,7 @@ end function runtests() # Create a temporary directory for test output test_output_directory = get_MPI_tempdir() - test_input["base_directory"] = test_output_directory + test_input["output"]["base_directory"] = test_output_directory @testset "Jacobian matrix" verbose=use_verbose begin println(" Jacobian matrix") diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 29fe633c5..2d609398f 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -58,7 +58,7 @@ function linear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, @@ -171,7 +171,7 @@ function nonlinear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, From 5eaeae860ee3f86139cd5c97e57e3f1cf9589d00 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 22:53:40 +0100 Subject: [PATCH 071/107] Fix D_matrix after merging gauss_legendre fixes See \#249. --- moment_kinetics/src/gauss_legendre.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 0d6e6addc..a2ad153af 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -134,7 +134,7 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms") - setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; periodic_bc=periodic_bc) dense_second_deriv_matrix = inv(mass_matrix) * K_matrix mass_matrix_lu = lu(sparse(mass_matrix)) if dirichlet_bc || periodic_bc From 55fae4e9f40f79154a1d14d3d607ecd4d480eb9b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 18 Sep 2024 23:06:49 +0100 Subject: [PATCH 072/107] Add function to assemble strong-form matrix in gauss_legendre ...and use this to assemble D_matrix. This replicates what was previously done in setup_global_weak_form_matrix!() before the fixes in \#249, as the old form was correct for a 'strong form' matrix like D_matrix, which is used to calculate a first derivative (with no mass matrix, etc.). Without this change, the Jacobian matrix tests fail because D_matrix is inconsistent with how the derivatives are calculated normally. --- moment_kinetics/src/gauss_legendre.jl | 75 ++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index a2ad153af..97c31d54e 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -134,7 +134,7 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms") - setup_global_weak_form_matrix!(D_matrix, lobatto, radau, coord, "D"; periodic_bc=periodic_bc) + setup_global_strong_form_matrix!(D_matrix, lobatto, radau, coord, "D"; periodic_bc=periodic_bc) dense_second_deriv_matrix = inv(mass_matrix) * K_matrix mass_matrix_lu = lu(sparse(mass_matrix)) if dirichlet_bc || periodic_bc @@ -951,6 +951,79 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, return nothing end +""" +A function that assigns the local matrices to +a global array QQ_global for later evaluating strong form of required 1D equation. + +The 'option' variable is a flag for +choosing the type of matrix to be constructed. +Currently the function is set up to assemble the +elemental matrices without imposing boundary conditions on the +first and final rows of the matrix. This means that +the operators constructed from this function can only be used +for differentiation, and not solving 1D ODEs. +The shared points in the element assembly are +averaged (instead of simply added) to be consistent with the +derivative_elements_to_full_grid!() function in calculus.jl. +""" +function setup_global_strong_form_matrix!(QQ_global::Array{mk_float,2}, + lobatto::gausslegendre_base_info, + radau::gausslegendre_base_info, + coord,option; periodic_bc=false) + QQ_j = allocate_float(coord.ngrid,coord.ngrid) + QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid) + + ngrid = coord.ngrid + imin = coord.imin + imax = coord.imax + @. QQ_global = 0.0 + + # fill in first element + j = 1 + # N.B. QQ varies with ielement for vperp, but not vpa + # a radau element is used for the vperp grid (see get_QQ_local!()) + get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) + if periodic_bc && coord.nrank != 1 + error("periodic boundary conditions not supported when dimension is distributed") + end + if periodic_bc && coord.nrank == 1 + QQ_global[imax[end], imin[j]:imax[j]] .+= QQ_j[1,:] ./ 2.0 + QQ_global[1,1] += 1.0 + QQ_global[1,end] += -1.0 + else + QQ_global[imin[j],imin[j]:imax[j]] .+= QQ_j[1,:] + end + for k in 2:imax[j]-imin[j] + QQ_global[k,imin[j]:imax[j]] .+= QQ_j[k,:] + end + if coord.nelement_local > 1 + QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:]./2.0 + else + QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:] + end + # remaining elements recalling definitions of imax and imin + for j in 2:coord.nelement_local + get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) + #lower boundary assembly on element + QQ_global[imin[j]-1,imin[j]-1:imax[j]] .+= QQ_j[1,:]./2.0 + for k in 2:imax[j]-imin[j]+1 + QQ_global[k+imin[j]-2,imin[j]-1:imax[j]] .+= QQ_j[k,:] + end + # upper boundary assembly on element + if j == coord.nelement_local + if periodic_bc && coord.nrank == 1 + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] / 2.0 + else + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] + end + else + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0 + end + end + + return nothing +end + function get_QQ_local!(QQ::Array{mk_float,2},ielement, lobatto::gausslegendre_base_info, radau::gausslegendre_base_info, From 13843456d2edefcf726362917f4fe63453f87fe5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 19 Sep 2024 09:06:32 +0100 Subject: [PATCH 073/107] Limit maximum electron pseudo-timestep size relative to ion_dt This may help to reduce iteration counts to reach a steady-state-enough solution for the electrons. --- .../src/electron_kinetic_equation.jl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index c5fb3c7a5..bd62b7434 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1312,6 +1312,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos right_preconditioner=right_preconditioner, coords=(z=z, vperp=vperp, vpa=vpa)) if newton_success + cap_factor_ion_dt = 10 #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) begin_serial_region() @serial_region begin @@ -1339,10 +1340,14 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.previous_dt[] /= t_params.max_increase_factor println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && t_params.previous_dt[] < cap_factor_ion_dt * ion_dt # Only took a few iterations, so increase initial step size. print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) - t_params.previous_dt[] *= t_params.max_increase_factor + if ion_dt === nothing + t_params.previous_dt[] *= t_params.max_increase_factor + else + t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, cap_factor_ion_dt * ion_dt) + end println(" -> ", t_params.previous_dt[]) end end @@ -1356,9 +1361,13 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if nl_solver_params.max_linear_iterations_this_step[] > 100 && t_params.dt[] > t_params.previous_dt[] # Step succeeded, but took a lot of iterations so decrease step size. t_params.dt[] /= t_params.max_increase_factor - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && t_params.dt[] < cap_factor_ion_dt * ion_dt # Only took a few iterations, so increase step size. - t_params.dt[] *= t_params.max_increase_factor + if ion_dt === nothing + t_params.dt[] *= t_params.max_increase_factor + else + t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, cap_factor_ion_dt * ion_dt) + end end end _block_synchronize() From 071471a6b33f4641f942bed0e32530f50193b12b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 19 Sep 2024 14:39:03 +0100 Subject: [PATCH 074/107] Fix loading of `restart_electron_physics` option after inputs refactor `electron_physics` is now in the `[composition]` section, and should always exist in the 'input' that is written to the output file (as `input_dict` has all the defaults filled in before it is written). --- moment_kinetics/src/load_data.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 14df0156c..ae0b091ab 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -875,12 +875,13 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_electron_evolve_ppar = true, true, true electron_evolve_density, electron_evolve_upar, electron_evolve_ppar = true, true, true - if "electron_physics" ∈ keys(restart_input) - restart_electron_physics = enum_from_string(electron_physics_type, - restart_input["electron_physics"]) - else - restart_electron_physics = boltzmann_electron_response - end + # Input is written to output files with all defaults filled in, and + # restart_input is read from a previous output file. + # restart_input["composition"]["electron_physics"] should always exist, even + # if it was set from a default, so we do not have to check the keys to see + # whether it exists. + restart_electron_physics = enum_from_string(electron_physics_type, + restart_input["composition"]["electron_physics"]) if pdf.electron !== nothing && restart_electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) From 8fc99150eb8408f6338fbd3d815c11d13d5bda29 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 19 Sep 2024 14:45:44 +0100 Subject: [PATCH 075/107] Do reload electron timestep when restarting a kinetic electron sim Need special handling for this, because electron_backward_euler!() is a special case (it is not an adaptive RK scheme, so does not have `adaptive = true`). --- moment_kinetics/src/time_advance.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 9ac2cbade..71df91ea8 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -319,9 +319,14 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, t_input["split_operators"]) if !adaptive - # No adaptive timestep, want to use the value from the input file even when we are - # restarting - dt_reload = nothing + if electron !== nothing + # No adaptive timestep, want to use the value from the input file even when we are + # restarting. + # Do not want to do this for electrons, because electron_backward_euler!() + # uses an adaptive timestep (based on nonlinear solver iteration counts) even + # though it does not use an adaptive RK scheme. + dt_reload = nothing + end # Makes no sense to use write_error_diagnostics because non-adaptive schemes have # no error estimate From b6d6332eef0cfa7e39c85dcd108547bf1a4fdf2b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 20 Sep 2024 23:38:32 +0100 Subject: [PATCH 076/107] Bndry adv_fac in `fill_electron_kinetic_equation_Jacobian!()` --- moment_kinetics/src/electron_kinetic_equation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index bd62b7434..25e8cf04a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -3014,8 +3014,8 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome z_speed = @view z_advect[1].speed[:,:,:,ir] begin_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin - adv_fac_lower[ivpa,ivperp] = -z_speed[ivpa,ivperp,1] - adv_fac_upper[ivpa,ivperp] = -z_speed[ivpa,ivperp,end] + adv_fac_lower[ivpa,ivperp] = -z_speed[1,ivpa,ivperp] + adv_fac_upper[ivpa,ivperp] = -z_speed[end,ivpa,ivperp] end add_electron_z_advection_to_Jacobian!( From 6cd036c0a5348268f26754bfe74abce5159163bf Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 20 Sep 2024 23:49:03 +0100 Subject: [PATCH 077/107] Fix precompile runs for kinetic electrons Need to use "gausslegendre_pseudospectral" so that we can build the Jacobian. --- util/precompile_run.jl | 3 +++ util/precompile_run_kinetic-electrons.jl | 14 +++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/util/precompile_run.jl b/util/precompile_run.jl index 09158e055..40ed0e57b 100644 --- a/util/precompile_run.jl +++ b/util/precompile_run.jl @@ -89,6 +89,9 @@ geo_input1 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition" => Op kinetic_electron_input = recursive_merge(cheb_input, OptionsDict("evolve_moments" => OptionsDict("density" => true, "parallel_flow" => true, "parallel_pressure" => true), + "z" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), + "vpa" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), + "vz" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), "r" => OptionsDict("ngrid" => 1, "nelement" => 1), "vperp" => OptionsDict("ngrid" => 1, diff --git a/util/precompile_run_kinetic-electrons.jl b/util/precompile_run_kinetic-electrons.jl index 28e8957e7..fd3e54ca7 100644 --- a/util/precompile_run_kinetic-electrons.jl +++ b/util/precompile_run_kinetic-electrons.jl @@ -18,36 +18,36 @@ input = OptionsDict("output" => OptionsDict("run_name" => "precompilation", "r" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "periodic", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "z" => OptionsDict("ngrid" => 5, "nelement" => 4, "bc" => "wall", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vperp" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vpa" => OptionsDict("ngrid" => 7, "nelement" => 8, "bc" => "zero", "L" => 8.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vzeta" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vr" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vz" => OptionsDict("ngrid" => 7, "nelement" => 8, "bc" => "zero", "L" => 8.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "timestepping" => OptionsDict("nstep" => 1, "dt" => 2.0e-11), "electron_timestepping" => OptionsDict("nstep" => 1, From e4c0f6590084f3fe1560b83a5ac55307c572e46e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 20 Sep 2024 23:54:29 +0100 Subject: [PATCH 078/107] Fix capping of electron dt relative to ion_dt --- moment_kinetics/src/electron_kinetic_equation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 25e8cf04a..5a02e0786 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1340,7 +1340,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.previous_dt[] /= t_params.max_increase_factor println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && t_params.previous_dt[] < cap_factor_ion_dt * ion_dt + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && (ion_dt === nothing || t_params.previous_dt[] < cap_factor_ion_dt * ion_dt) # Only took a few iterations, so increase initial step size. print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) if ion_dt === nothing @@ -1361,7 +1361,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if nl_solver_params.max_linear_iterations_this_step[] > 100 && t_params.dt[] > t_params.previous_dt[] # Step succeeded, but took a lot of iterations so decrease step size. t_params.dt[] /= t_params.max_increase_factor - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && t_params.dt[] < cap_factor_ion_dt * ion_dt + elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && (ion_dt === nothing || t_params.dt[] < cap_factor_ion_dt * ion_dt) # Only took a few iterations, so increase step size. if ion_dt === nothing t_params.dt[] *= t_params.max_increase_factor From 88035a30ffbe735ac699ead4c8cb77826fec077f Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 00:12:55 +0100 Subject: [PATCH 079/107] Activate bounds checking in CI debug checks Previously this was mistakenly deactivated when precompiling, because `precompile.jl` forces `--check-bounds=no`. --- .github/workflows/debug_checks.yml | 2 +- .../debug_test/recycling_fraction_inputs.jl | 4 ++-- precompile-with-check-bounds.jl | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 precompile-with-check-bounds.jl diff --git a/.github/workflows/debug_checks.yml b/.github/workflows/debug_checks.yml index bc534eb24..fc2d1edec 100644 --- a/.github/workflows/debug_checks.yml +++ b/.github/workflows/debug_checks.yml @@ -34,7 +34,7 @@ jobs: julia --project -O3 --check-bounds=yes -e 'using Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler", "Symbolics"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=yes -e 'using Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.precompile()' - julia --project -O3 --check-bounds=yes precompile.jl --debug 2 + julia --project -O3 --check-bounds=yes precompile-with-check-bounds.jl --debug 2 # Need to use openmpi so that the following arguments work: # * `--mca rmaps_base_oversubscribe 1` allows oversubscription (more processes diff --git a/moment_kinetics/debug_test/recycling_fraction_inputs.jl b/moment_kinetics/debug_test/recycling_fraction_inputs.jl index fec866af1..2d31efef4 100644 --- a/moment_kinetics/debug_test/recycling_fraction_inputs.jl +++ b/moment_kinetics/debug_test/recycling_fraction_inputs.jl @@ -65,12 +65,12 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "discretization" => "chebyshev_pseudospectral", "element_spacing_option" => "sqrt"), "vpa" => OptionsDict("ngrid" => 3, - "nelement" => 2, + "nelement" => 3, "L" => 6.0, "bc" => "zero", "discretization" => "chebyshev_pseudospectral"), "vz" => OptionsDict("ngrid" => 3, - "nelement" => 2, + "nelement" => 4, "L" => 6.0, "bc" => "zero", "discretization" => "chebyshev_pseudospectral"), diff --git a/precompile-with-check-bounds.jl b/precompile-with-check-bounds.jl new file mode 100644 index 000000000..b7be917bb --- /dev/null +++ b/precompile-with-check-bounds.jl @@ -0,0 +1,16 @@ +using Pkg + +# Activate the moment_kinetics package +Pkg.activate(".") + +using PackageCompiler + +# Create the sysimage 'moment_kinetics.so' in the base moment_kinetics source directory +# with both moment_kinetics and the dependencies listed above precompiled. +# Warning: editing the code will not affect what runs when using this .so, you +# need to re-precompile if you change anything. +create_sysimage(; sysimage_path="moment_kinetics.so", + precompile_execution_file="util/precompile_run.jl", + include_transitive_dependencies=false, # This is needed to make MPI work, see https://github.com/JuliaParallel/MPI.jl/issues/518 + sysimage_build_args=`-O3 --check-bounds=yes`, + ) From bd9f612d7ab2aa457adecc01ac7bd97774935de7 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 01:21:46 +0100 Subject: [PATCH 080/107] Make timestep control parameters for kinetic electrons settable ...using the input file, instead of their values being hard-coded. --- .../periodic_split3_kinetic-IMEX.toml | 3 +++ ...it-electron_ppar-loworder-PareschiRusso2222.toml | 3 +++ ...it3_kinetic-implicit-electron_ppar-loworder.toml | 3 +++ .../kinetic-electrons/periodic_split3_kinetic.toml | 3 +++ ...periodic_split3_kinetic_high-collisionality.toml | 3 +++ ..._recyclefraction0.5_split3_kinetic-vpadiss0.toml | 3 +++ moment_kinetics/src/electron_kinetic_equation.jl | 13 ++++++------- moment_kinetics/src/input_structs.jl | 3 +++ moment_kinetics/src/moment_kinetics_input.jl | 3 +++ moment_kinetics/src/time_advance.jl | 12 +++++++++++- 10 files changed, 41 insertions(+), 8 deletions(-) diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml index a67591698..a0d6321d6 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml @@ -116,6 +116,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml index 0b302dae3..3a6c18ad7 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml @@ -121,6 +121,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 #converged_residual_value = 0.1 #1.0e-3 converged_residual_value = 1.0e-2 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml index b73a980fa..68b355bed 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -128,6 +128,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 #converged_residual_value = 0.1 #1.0e-3 converged_residual_value = 1.0e-2 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic.toml b/examples/kinetic-electrons/periodic_split3_kinetic.toml index b99a5afb0..56d532c32 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic.toml @@ -134,6 +134,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml index 8a7e9ea3a..9f3452eb9 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml @@ -137,6 +137,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml index dc32eac73..be7bcb0a1 100644 --- a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml @@ -110,6 +110,9 @@ type = "Fekete4(3)" rtol = 1.0e-3 atol = 1.0e-14 minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5a02e0786..6655d6a03 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1312,7 +1312,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos right_preconditioner=right_preconditioner, coords=(z=z, vperp=vperp, vpa=vpa)) if newton_success - cap_factor_ion_dt = 10 #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) begin_serial_region() @serial_region begin @@ -1333,20 +1332,20 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.previous_dt[] = t_params.dt[] println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] > 100 + elseif nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold # Step succeeded, but took a lot of iterations so decrease initial # step size. print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) t_params.previous_dt[] /= t_params.max_increase_factor println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && (ion_dt === nothing || t_params.previous_dt[] < cap_factor_ion_dt * ion_dt) + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.previous_dt[] < t_params.cap_factor_ion_dt * ion_dt) # Only took a few iterations, so increase initial step size. print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) if ion_dt === nothing t_params.previous_dt[] *= t_params.max_increase_factor else - t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, cap_factor_ion_dt * ion_dt) + t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end println(" -> ", t_params.previous_dt[]) end @@ -1358,15 +1357,15 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # the solver than the nonlinear iteration count, or the linear iterations # per nonlinear iteration #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) - if nl_solver_params.max_linear_iterations_this_step[] > 100 && t_params.dt[] > t_params.previous_dt[] + if nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold && t_params.dt[] > t_params.previous_dt[] # Step succeeded, but took a lot of iterations so decrease step size. t_params.dt[] /= t_params.max_increase_factor - elseif nl_solver_params.max_linear_iterations_this_step[] < 20 && (ion_dt === nothing || t_params.dt[] < cap_factor_ion_dt * ion_dt) + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.dt[] < t_params.cap_factor_ion_dt * ion_dt) # Only took a few iterations, so increase step size. if ion_dt === nothing t_params.dt[] *= t_params.max_increase_factor else - t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, cap_factor_ion_dt * ion_dt) + t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end end end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index dae3ec23e..641276e33 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -80,6 +80,9 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero implicit_vpa_advection::Bool implicit_electron_ppar::Bool constraint_forcing_rate::mk_float + decrease_dt_iteration_threshold::mk_int + increase_dt_iteration_threshold::mk_int + cap_factor_ion_dt::mk_float write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 85e2151e7..b04bbd9a4 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -207,6 +207,9 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI write_steady_state_diagnostics=false, high_precision_error_sum=timestepping_section["high_precision_error_sum"], initialization_residual_value=1.0, + decrease_dt_iteration_threshold=100, + increase_dt_iteration_threshold=20, + cap_factor_ion_dt=10.0, no_restart=false, debug_io=false, ) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 71df91ea8..1b220e97a 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -434,12 +434,21 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, else debug_io = nothing end + decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] + increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] + cap_factor_ion_dt = t_input["cap_factor_ion_dt"] electron_t_params = nothing elseif electron === false debug_io = nothing + decrease_dt_iteration_threshold = -1 + increase_dt_iteration_threshold = typemax(mk_int) + cap_factor_ion_dt = Inf electron_t_params = nothing else debug_io = nothing + decrease_dt_iteration_threshold = -1 + increase_dt_iteration_threshold = typemax(mk_int) + cap_factor_ion_dt = Inf electron_t_params = electron end return time_info(n_variables, t_input["nstep"], end_time, t_shared, dt_shared, @@ -461,7 +470,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], electron !== nothing && t_input["implicit_electron_ppar"], - t_input["constraint_forcing_rate"], + t_input["constraint_forcing_rate"], decrease_dt_iteration_threshold, + increase_dt_iteration_threshold, cap_factor_ion_dt, t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], t_input["converged_residual_value"], From dbc123cce80931529715e034dd64b80150ba024a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 01:23:07 +0100 Subject: [PATCH 081/107] Make advance_info struct immutable This is allowed now, and may be slightly more efficient. --- moment_kinetics/src/input_structs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 641276e33..ac6ecf192 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -96,7 +96,7 @@ end """ """ -mutable struct advance_info +struct advance_info vpa_advection::Bool vperp_advection::Bool z_advection::Bool From e94efb78b3988a4382e7b73213e1edf075e46624 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 01:25:17 +0100 Subject: [PATCH 082/107] Input file(s) for kinetic electron run with implicit pseudo-timestep --- examples/kinetic-electrons/README.md | 10 ++ .../periodic_split3_boltzmann.toml | 3 - .../periodic_split3_kinetic-IMEX.toml | 13 +- .../periodic_split3_kinetic.toml | 13 +- ...ic_split3_kinetic_high-collisionality.toml | 13 +- .../wall+sheath-bc_boltzmann_loworder.toml | 3 - .../wall+sheath-bc_kinetic.toml | 23 ++- ...wall+sheath-bc_kinetic_krook_loworder.toml | 22 +-- .../wall+sheath-bc_kinetic_loworder.toml | 20 +-- ...boltzmann-coarse_tails-uniform-z-init.toml | 129 ++++++++++++++ ...lit3_boltzmann-coarse_tails-uniform-z.toml | 127 ++++++++++++++ ...rse_tails-uniform-z-PareschiRusso2222.toml | 160 ++++++++++++++++++ ...lefraction0.5_split3_kinetic-vpadiss0.toml | 13 +- 13 files changed, 479 insertions(+), 70 deletions(-) create mode 100644 examples/kinetic-electrons/README.md create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml diff --git a/examples/kinetic-electrons/README.md b/examples/kinetic-electrons/README.md new file mode 100644 index 000000000..eaaa03310 --- /dev/null +++ b/examples/kinetic-electrons/README.md @@ -0,0 +1,10 @@ +This directory contains input files for some kinetic electron simulations that +are known to run (and probably some other experimental input files too). Inputs +that are expected to work: +* Wall bc with uniform grid. First converge a Boltzmann-electron simulation to + steady state, then restart kinetic electron simulation from that, e.g. + ```julia + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml") + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml; restart="runs/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.dfns.h5") + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml"; restart="runs/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.dfns.h5") + ``` diff --git a/examples/kinetic-electrons/periodic_split3_boltzmann.toml b/examples/kinetic-electrons/periodic_split3_boltzmann.toml index b0bbdc4ee..1b7715688 100644 --- a/examples/kinetic-electrons/periodic_split3_boltzmann.toml +++ b/examples/kinetic-electrons/periodic_split3_boltzmann.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml index a0d6321d6..d7f5461ff 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic.toml b/examples/kinetic-electrons/periodic_split3_kinetic.toml index 56d532c32..ad33440e0 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml index 9f3452eb9..a16f23b0c 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml index a06f36b1e..ca8ebbf38 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml @@ -5,9 +5,6 @@ ionization_frequency = 2.0 #electron_ionization_frequency = 2.0 #ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml index 84fb66008..6270dc514 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml @@ -2,12 +2,9 @@ charge_exchange_frequency = 2.0 electron_charge_exchange_frequency = 0.0 ionization_frequency = 2.0 -electron_ionization_frequency = 2.0 +#electron_ionization_frequency = 2.0 ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,29 +17,29 @@ nelement = 1 [z] ngrid = 9 -#nelement = 16 -nelement = 32 +nelement = 16 +#nelement = 32 #nelement = 64 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 17 -#nelement = 10 -nelement = 20 +nelement = 10 +#nelement = 20 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 17 -#nelement = 10 -nelement = 20 +nelement = 10 +#nelement = 20 L = 8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml index 843b7696a..6064b6347 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml @@ -2,12 +2,9 @@ charge_exchange_frequency = 2.0 electron_charge_exchange_frequency = 0.0 ionization_frequency = 2.0 -electron_ionization_frequency = 2.0 +#electron_ionization_frequency = 2.0 ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,28 +17,31 @@ nelement = 1 [z] ngrid = 5 -#nelement = 32 -nelement = 64 +nelement = 32 +#nelement = 64 #nelement = 128 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 5 +nelement = 31 #nelement = 40 -nelement = 80 +#nelement = 80 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 5 -nelement = 80 +nelement = 31 +#nelement = 40 +#nelement = 80 L = 8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml index 6abaa537c..b09041af1 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml @@ -5,9 +5,6 @@ ionization_frequency = 2.0 #electron_ionization_frequency = 2.0 #ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,28 +17,31 @@ nelement = 1 [z] ngrid = 5 -#nelement = 32 -nelement = 64 +nelement = 32 +#nelement = 64 #nelement = 128 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 5 +nelement = 31 #nelement = 40 -nelement = 80 +#nelement = 80 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 5 -nelement = 80 +nelement = 31 +#nelement = 40 +#nelement = 80 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml new file mode 100644 index 000000000..528b2d80a --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml @@ -0,0 +1,129 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "Fekete4(3)" +nstep = 2000000 +dt = 1.0e-5 +minimum_dt = 1.0e-6 +#maximum_dt = 5.0e-5 +#rtol = 1.0e-5 +#atol = 1.0e-12 +rtol = 1.0 +atol = 1.0 +nwrite = 25000 +nwrite_dfns = 25000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml new file mode 100644 index 000000000..c3dea0ad4 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml @@ -0,0 +1,127 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "Fekete4(3)" +nstep = 10000000 +dt = 1.0e-5 +minimum_dt = 1.0e-6 +#maximum_dt = 5.0e-5 +rtol = 1.0e-5 +atol = 1.0e-12 +nwrite = 25000 +nwrite_dfns = 25000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml new file mode 100644 index 000000000..edbc18d7c --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml @@ -0,0 +1,160 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "PareschiRusso2(2,2,2)" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 100000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 2.0e-8 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml index be7bcb0a1..2fbd82e81 100644 --- a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml @@ -17,27 +17,28 @@ ngrid = 5 nelement = 32 #nelement_local = 16 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 6 -nelement = 63 +nelement = 31 #63 L = 48.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 -nelement = 63 +nelement = 31 #63 L = 36.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 n_neutral_species = 1 -electron_physics = "kinetic_electrons_with_temperature_equation" +#electron_physics = "kinetic_electrons_with_temperature_equation" +electron_physics = "kinetic_electrons" recycling_fraction = 0.5 T_e = 0.2 # 1.0 T_wall = 0.1 From 16256d6fde078c184287997d6ca8b059958acd24 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 13:35:56 +0100 Subject: [PATCH 083/107] Update examples CI job with new input format Also exclude the PareschiRusso IMEX methods and any kinetic electron solve on macOS, as the least-squares solver will not work. --- .github/workflows/examples.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index d819ef9fd..b4332f3f4 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -27,4 +27,4 @@ jobs: # because the way we use MINPACK.jl (needed for nonlinear solvers # used for implicit parts of timestep) doesn't currently work on # macOS. - julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if (occursin("ARK", get(t_input, "type", "")) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(input, "z_nelement_local", ""); pop!(input, "r_nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' + julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if ((occursin("ARK", get(t_input, "type", "")) || occursin("PareschiRusso", get(t_input, "type", "")) || occursin("kinetic_electrons", get(get(input, "composition", Dict{String,Any}()), "electron_physics", "boltzmann_electron_response"))) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' From f16d9317f95cb5e485eb2cb48600989fb5a7cc42 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 21 Sep 2024 13:37:19 +0100 Subject: [PATCH 084/107] Allow different vpa and vz grids for 1V reactions for 'split1' Previously in 1V, evolving density (but not upar or ppar) mode it was assumed that `vpa.grid` and `vz.grid` are identical. It might be useful to be allowed to choose different grids for vpa and vz, so enable this. --- moment_kinetics/src/charge_exchange.jl | 61 +++++++++++++------------- moment_kinetics/src/ionization.jl | 59 ++++++++++++------------- 2 files changed, 58 insertions(+), 62 deletions(-) diff --git a/moment_kinetics/src/charge_exchange.jl b/moment_kinetics/src/charge_exchange.jl index ac460dba0..3f6095a22 100644 --- a/moment_kinetics/src/charge_exchange.jl +++ b/moment_kinetics/src/charge_exchange.jl @@ -120,39 +120,38 @@ function charge_exchange_collisions_single_species!(f_out, pdf_in, pdf_other, # values of dz/dt; as charge exchange and ionization collisions require # the evaluation of the pdf for species s' to obtain the update for species s, # will thus have to interpolate between the different vpa grids - if moments.evolve_ppar || moments.evolve_upar - if !moments.evolve_upar - # if evolve_ppar = true and evolve_upar = false, vpa coordinate is - # vpahat_s = vpa/vth_s; - # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); - # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations - # in terms of the vpahat_{s'} coordinate: - # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} - @. vpa.scratch = vpa.grid / vth_ratio - elseif !moments.evolve_ppar - # if evolve_ppar = false and evolve_upar = true, vpa coordinate is - # wpa_s = vpa-upar_s; - # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; - # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations - # in terms of the wpa_{s'} coordinate: - # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} - @. vpa.scratch = vpa.grid + upar[iz,ir] - upar_other[iz,ir] - else - # if evolve_ppar = true and evolve_upar = true, vpa coordinate is - # wpahat_s = (vpa-upar_s)/vth_s; - # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); - # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations - # in terms of the wpahat_{s'} coordinate: - # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} - @. vpa.scratch = (vpa.grid * vth[iz,ir] + upar[iz,ir] - upar_other[iz,ir]) / vth_other[iz,ir] - end - # interpolate to the new grid (passed in as vpa.scratch) - # and return interpolated values in vpa.scratch2 - @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, pdf_other[:,iz,ir], vpa_other, spectral_other) + if moments.evolve_upar && moments.evolve_ppar + # if evolve_ppar = true and evolve_upar = true, vpa coordinate is + # wpahat_s = (vpa-upar_s)/vth_s; + # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); + # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations + # in terms of the wpahat_{s'} coordinate: + # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} + new_grid = @. vpa.scratch = (vpa.grid * vth[iz,ir] + upar[iz,ir] - upar_other[iz,ir]) / vth_other[iz,ir] + elseif !moments.evolve_upar + # if evolve_ppar = true and evolve_upar = false, vpa coordinate is + # vpahat_s = vpa/vth_s; + # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); + # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations + # in terms of the vpahat_{s'} coordinate: + # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} + new_grid = @. vpa.scratch = vpa.grid / vth_ratio + elseif !moments.evolve_ppar + # if evolve_ppar = false and evolve_upar = true, vpa coordinate is + # wpa_s = vpa-upar_s; + # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; + # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations + # in terms of the wpa_{s'} coordinate: + # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} + new_grid = @. vpa.scratch = vpa.grid + upar[iz,ir] - upar_other[iz,ir] else - # no need to interpolate if neither upar or ppar evolved separately from pdf - vpa.scratch2 .= pdf_other[:,iz,ir] + # Interpolate even when using 'drift-kinetic' mode, so that vpa and vz + # coordinates can be different. + new_grid = vpa.grid end + # interpolate to new_grid and return interpolated values in vpa.scratch2 + @views interpolate_to_grid_vpa!(vpa.scratch2, new_grid, pdf_other[:,iz,ir], vpa_other, spectral_other) + if neutrals @loop_vz ivz begin f_out[ivz,iz,ir] += dt * charge_exchange_frequency * density_other[iz,ir] * diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl index 0620f0717..c4380c208 100644 --- a/moment_kinetics/src/ionization.jl +++ b/moment_kinetics/src/ionization.jl @@ -42,39 +42,36 @@ function ion_ionization_collisions_1V!(f_out, fvec_in, vz, vpa, vperp, z, r, vz_ # values of dz/dt; as charge exchange and ionization collisions require # the evaluation of the pdf for species s' to obtain the update for species s, # will thus have to interpolate between the different vpa grids - if moments.evolve_ppar || moments.evolve_upar - if !moments.evolve_upar - # if evolve_ppar = true and evolve_upar = false, vpa coordinate is - # vpahat_s = vpa/vth_s; - # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); - # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations - # in terms of the vpahat_{s'} coordinate: - # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} - @. vpa.scratch = vpa.grid / vth_ratio - elseif !moments.evolve_ppar - # if evolve_ppar = false and evolve_upar = true, vpa coordinate is - # wpa_s = vpa-upar_s; - # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; - # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations - # in terms of the wpa_{s'} coordinate: - # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} - @. vpa.scratch = vpa.grid + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn] - else - # if evolve_ppar = true and evolve_upar = true, vpa coordinate is - # wpahat_s = (vpa-upar_s)/vth_s; - # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); - # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations - # in terms of the wpahat_{s'} coordinate: - # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} - @. vpa.scratch = (vpa.grid * moments.ion.vth[iz,ir,is] + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn]) / moments.neutral.vth[iz,ir,isn] - end - # interpolate to the new grid (passed in as vpa.scratch) - # and return interpolated values in vpa.scratch2 - @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, fvec_in.pdf_neutral[:,1,1,iz,ir,isn], vz, vz_spectral) + if moments.evolve_upar && moments.evolve_ppar + # if evolve_ppar = true and evolve_upar = true, vpa coordinate is + # wpahat_s = (vpa-upar_s)/vth_s; + # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); + # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations + # in terms of the wpahat_{s'} coordinate: + # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} + new_grid = @. vpa.scratch = (vpa.grid * moments.ion.vth[iz,ir,is] + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn]) / moments.neutral.vth[iz,ir,isn] + elseif !moments.evolve_upar + # if evolve_ppar = true and evolve_upar = false, vpa coordinate is + # vpahat_s = vpa/vth_s; + # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); + # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations + # in terms of the vpahat_{s'} coordinate: + # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} + new_grid = @. vpa.scratch = vpa.grid / vth_ratio + elseif !moments.evolve_ppar + # if evolve_ppar = false and evolve_upar = true, vpa coordinate is + # wpa_s = vpa-upar_s; + # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; + # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations + # in terms of the wpa_{s'} coordinate: + # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} + new_grid = @. vpa.scratch = vpa.grid + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn] else - # no need to interpolate if neither upar or ppar evolved separately from pdf - vpa.scratch2 .= fvec_in.pdf_neutral[:,1,1,iz,ir,isn] + new_grid = vpa.grid end + # interpolate to the new grid (passed in as vpa.scratch) + # and return interpolated values in vpa.scratch2 + @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, fvec_in.pdf_neutral[:,1,1,iz,ir,isn], vz, vz_spectral) ionization = collisions.reactions.ionization_frequency @loop_vpa ivpa begin f_out[ivpa,1,iz,ir,is] += From 05faeefac91c1cd6b2372df0b3a0f86d7bcdca2c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 22 Sep 2024 16:40:39 +0100 Subject: [PATCH 085/107] Don't use z-upwinding at distributed-MPI boundaries in Jacobian Would have expected including upwinding to be a good idea, to be consistent with `electron_z_advection!()`, but upwinding makes convergence for the implicit solve in `electron_backward_euler!()` much slower (~10x more iterations). Maybe this could indicate that it is more important to have a fully self-consistent Jacobian inversion for the `electron_vpa_advection()` part rather than taking half(ish) of the values from one block and the other half(ish) from the other. --- .../src/electron_kinetic_equation.jl | 46 ++++++++++--------- moment_kinetics/src/nonlinear_solvers.jl | 2 - moment_kinetics/test/jacobian_matrix_tests.jl | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 6655d6a03..5a9860f70 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1031,18 +1031,19 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval +println("recalculating precon") nl_solver_params.solves_since_precon_update[] = 0 nl_solver_params.precon_dt[] = t_params.dt[] - orig_lu, precon_matrix, input_buffer, output_buffer, adv_fac_lower, - adv_fac_upper = nl_solver_params.preconditioners[ir] + orig_lu, precon_matrix, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] fill_electron_kinetic_equation_Jacobian!( precon_matrix, f_electron_new, electron_ppar_new, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar, adv_fac_lower, adv_fac_upper) + ir, evolve_ppar) begin_serial_region() if block_rank[] == 0 @@ -1051,7 +1052,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos # cannot reuse it. nl_solver_params.preconditioners[ir] = (lu(sparse(precon_matrix)), precon_matrix, input_buffer, - output_buffer, adv_fac_lower, adv_fac_upper) + output_buffer) else # LU decomposition was previously created. The Jacobian always # has the same sparsity pattern, so by using `lu!()` we can @@ -1067,13 +1068,11 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos orig_lu = lu(sparse(precon_matrix)) end nl_solver_params.preconditioners[ir] = - (orig_lu, precon_matrix, input_buffer, output_buffer, - adv_fac_lower, adv_fac_upper) + (orig_lu, precon_matrix, input_buffer, output_buffer) end else nl_solver_params.preconditioners[ir] = - (orig_lu, precon_matrix, input_buffer, output_buffer, - adv_fac_lower, adv_fac_upper) + (orig_lu, precon_matrix, input_buffer, output_buffer) end end @@ -1081,8 +1080,8 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos function lu_precon!(x) precon_ppar, precon_f = x - precon_lu, _, input_buffer, output_buffer, adv_fac_lower, - adv_fac_upper = nl_solver_params.preconditioners[ir] + precon_lu, _, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] begin_serial_region() counter = 1 @@ -1123,9 +1122,21 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( - precon_f, adv_fac_lower, adv_fac_upper, f_lower_endpoints, - f_upper_endpoints, receive_buffer1, receive_buffer2, z) + precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) begin_serial_region() @serial_region begin @@ -2944,8 +2955,7 @@ end vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar, adv_fac_lower, - adv_fac_upper) + ir, evolve_ppar) Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if `evolve_ppar=true`) the electron energy equation. @@ -2956,8 +2966,7 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, adv_fac_lower, - adv_fac_upper) + evolve_ppar) dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -3011,11 +3020,6 @@ function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, mome end z_speed = @view z_advect[1].speed[:,:,:,ir] - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - adv_fac_lower[ivpa,ivperp] = -z_speed[1,ivpa,ivperp] - adv_fac_upper[ivpa,ivperp] = -z_speed[end,ivpa,ivperp] - end add_electron_z_advection_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 0e4230a02..8b6dad4a6 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -154,8 +154,6 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), allocate_shared_float(pdf_plus_ppar_size), - allocate_shared_float(coords.vpa.n,coords.vperp.n), - allocate_shared_float(coords.vpa.n,coords.vperp.n), ), reverse(outer_coord_sizes)) elseif preconditioner_type == "none" diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 419eef809..599ba9e9e 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -2421,7 +2421,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true, scratch_dummy.buffer_vpavperp_1, scratch_dummy.buffer_vpavperp_2) + true) function residual_func!(residual_f, residual_p, this_f, this_p) begin_z_region() From 1cb227ea27505127acafd8c3cbf34b5c32e16908 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 22 Sep 2024 16:53:29 +0100 Subject: [PATCH 086/107] Fix dependencies in parallel CI job --- .github/workflows/parallel_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index 405c5777e..99e2f6d80 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -26,7 +26,7 @@ jobs: touch Project.toml julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "Test"]); Pkg.develop(path="moment_kinetics/")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' # Need to use openmpi so that the following arguments work: # * `--mca rmaps_base_oversubscribe 1` allows oversubscription (more processes @@ -58,7 +58,7 @@ jobs: touch Project.toml julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "Test"]); Pkg.develop(path="moment_kinetics/")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' # Need to use openmpi so that the following arguments work: # * `--mca rmaps_base_oversubscribe 1` allows oversubscription (more processes From 7025661e3de82cdd824f19926a9404c5a61d6495 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 22 Sep 2024 18:05:03 +0100 Subject: [PATCH 087/107] Use gausslegendre_pseudospectral in kinetic electron debug check --- moment_kinetics/debug_test/kinetic_electron_inputs.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl index 0d4e8d042..a66e91684 100644 --- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl +++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl @@ -69,18 +69,18 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "z" => OptionsDict("ngrid" => 3, "nelement" => 24, "bc" => "wall", - "discretization" => "chebyshev_pseudospectral", + "discretization" => "gausslegendre_pseudospectral", "element_spacing_option" => "sqrt"), "vpa" => OptionsDict("ngrid" => 3, "nelement" => 16, "L" => 6.0, "bc" => "zero", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vz" => OptionsDict("ngrid" => 3, "nelement" => 6, "L" => 6.0, "bc" => "zero", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "ion_source_1" => OptionsDict("active" => true, "z_profile" => "gaussian", "z_width" => 0.125, From 9f36b89b026a6b5b4e7cdf02c575ce0515103cf6 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 22 Sep 2024 18:41:52 +0100 Subject: [PATCH 088/107] Fix nl_solver_params update for 'implicit_electron_advance' --- moment_kinetics/src/electron_kinetic_equation.jl | 16 ++++++++++------ moment_kinetics/src/initial_conditions.jl | 7 +++++-- moment_kinetics/src/time_advance.jl | 3 ++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 5a9860f70..9164e1de0 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1586,7 +1586,7 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo external_source_settings, num_diss_params, r, z, vperp, vpa, r_spectral, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, gyroavs, - scratch_dummy, dt, nl_solver_params) + scratch_dummy, t_params, ion_dt, nl_solver_params) electron_ppar_out = fvec_out.electron_ppar # Store the solved-for pdf in n_rk_stages+1, because this was the version that gets @@ -1612,9 +1612,10 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo fvec_in.density, fvec_in.electron_upar, fvec_in.density, fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, - collisions, dt, composition, + collisions, ion_dt, composition, external_source_settings.electron, num_diss_params, r, z) + newton_success = false for ir ∈ 1:r.n function residual_func!(residual, new_variables; debug=false) electron_ppar_residual, f_electron_residual = residual @@ -1658,7 +1659,7 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo # fvec_in.ppar[:,ir], fvec_in, moments, # collisions, composition, # external_source_settings, num_diss_params, - # z, dt, ir) + # z, ion_dt, ir) # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the # electron_pdf member of the first argument, so if we set the electron_pdf member @@ -1668,12 +1669,12 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo @loop_z_vperp_vpa iz ivperp ivpa begin f_electron_residual[ivpa,ivperp,iz] = 0.0 end + t_params.dt[] = pdf_electron_normalisation_factor electron_kinetic_equation_euler_update!( f_electron_residual, electron_ppar_residual, f_electron_new, electron_ppar_new, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, pdf_electron_normalisation_factor, t_params, ir; - soft_force_constraints=true) + num_diss_params, t_params, ir; soft_force_constraints=true) @loop_z_vperp_vpa iz ivperp ivpa begin f_electron_residual[ivpa,ivperp,iz] /= sqrt(1.0 + vpa.grid[ivpa]^2) end @@ -1754,7 +1755,10 @@ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, mo rhs_delta, v, w, nl_solver_params; left_preconditioner=nothing, right_preconditioner=nothing, - coords=(r=r, z=z, vperp=vperp, vpa=vpa)) + coords=(z=z, vperp=vperp, vpa=vpa)) + if !newton_success + break + end end # Fill pdf.electron.norm diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 1b548d2b0..321a79cd5 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -757,8 +757,11 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.global_nonlinear_iterations, nl_solver_params.electron_advance.global_linear_iterations, nl_solver_params.electron_advance.solves_since_precon_update, + nl_solver_params.electron_advance.precon_dt, nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, + nl_solver_params.electron_advance.max_linear_iterations_this_step, + nl_solver_params.electron_advance.preconditioner_type, nl_solver_params.electron_advance.preconditioner_update_interval, nl_solver_params.electron_advance.preconditioners, ) @@ -772,8 +775,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field num_diss_params, r, z, vperp, vpa, r_spectral, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - gyroavs, scratch_dummy, 0.0, - initialisation_nl_solver_params) + gyroavs, scratch_dummy, t_params.electron, + 0.0, initialisation_nl_solver_params) else success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 1b220e97a..6a2a7905e 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3533,7 +3533,8 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen r_spectral, z_spectral, vperp_spectral, vpa_spectral, electron_z_advect, electron_vpa_advect, gyroavs, scratch_dummy, - dt, nl_solver_params.electron_advance) + t_params.electron, t_params.dt[], + nl_solver_params.electron_advance) elseif t_params.implicit_electron_ppar max_electron_pdf_iterations = 1000 max_electron_sim_time = 1.0e-3 From 1c23c01fab0b4aedbce9679b12d9ddf528efa5ca Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 23 Sep 2024 09:48:15 +0100 Subject: [PATCH 089/107] Better defaults for implicit electron solve `implicit_electron_advance` tries to do a single non-linear solve for the steady state, and does not work (yet). `implicit_electron_ppar` uses pseudo-timestepping to find the steady state, with a backward-Euler pseudo-timestep, and does work (at least sometimes), so should be the default method. --- examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml | 1 + moment_kinetics/src/moment_kinetics_input.jl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml index d7f5461ff..a299f73f1 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml @@ -86,6 +86,7 @@ upar_phase = 0.0 [timestepping] type = "KennedyCarpenterARK324" implicit_electron_advance = true +implicit_electron_ppar = false implicit_ion_advance = false implicit_vpa_advection = false nstep = 1000000 diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index b04bbd9a4..9f49aef52 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -160,10 +160,10 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI minimum_dt=0.0, maximum_dt=Inf, implicit_braginskii_conduction=true, - implicit_electron_advance=true, + implicit_electron_advance=false, implicit_ion_advance=false, implicit_vpa_advection=false, - implicit_electron_ppar=false, + implicit_electron_ppar=true, constraint_forcing_rate=0.0, write_after_fixed_step_count=false, write_error_diagnostics=false, From dd4fe1914880af6dd708a2435579524d70683d04 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 23 Sep 2024 13:00:37 +0100 Subject: [PATCH 090/107] Fix shared memory parallelisation in `electron_backward_euler!()` Also fix up the debug checks for the kinetic electron run. --- .../debug_test/kinetic_electron_inputs.jl | 69 +++++++++---------- .../src/electron_kinetic_equation.jl | 11 ++- moment_kinetics/src/electron_z_advection.jl | 3 +- moment_kinetics/src/external_sources.jl | 2 +- moment_kinetics/src/time_advance.jl | 2 +- 5 files changed, 47 insertions(+), 40 deletions(-) diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl index a66e91684..4f834c319 100644 --- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl +++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl @@ -2,7 +2,7 @@ test_type = "Kinetic electron" using moment_kinetics.type_definitions: OptionsDict test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, - "n_neutral_species" => 1, + "n_neutral_species" => 0, #1, "electron_physics" => "kinetic_electrons", "recycling_fraction" => 0.5, "T_e" => 0.2, @@ -29,57 +29,54 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "upar_phase" => 0.0, "temperature_amplitude" => 0.0, "temperature_phase" => 0.0), - "neutral_species_1" => OptionsDict("initial_density" => 1.0, - "initial_temperature" => 1.0), - "z_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 0.001, - "density_phase" => 0.0, - "upar_amplitude" => -1.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 1.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), + #"neutral_species_1" => OptionsDict("initial_density" => 1.0, + # "initial_temperature" => 1.0), + #"z_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + # "density_amplitude" => 0.001, + # "density_phase" => 0.0, + # "upar_amplitude" => -1.0, + # "upar_phase" => 0.0, + # "temperature_amplitude" => 0.0, + # "temperature_phase" => 0.0), + #"vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + # "density_amplitude" => 1.0, + # "density_phase" => 0.0, + # "upar_amplitude" => 0.0, + # "upar_phase" => 0.0, + # "temperature_amplitude" => 0.0, + # "temperature_phase" => 0.0), "reactions" => OptionsDict("charge_exchange_frequency" => 0.75, "ionization_frequency" => 0.5), - "timestepping" => OptionsDict("type" => "Fekete4(3)", + "timestepping" => OptionsDict("type" => "PareschiRusso2(2,2,2)", "nstep" => 3, - "dt" => 2.0e-8, - "minimum_dt" => 1.0e-8, - "CFL_prefactor" => 1.0, - "step_update_prefactor" => 0.4, - "nwrite" => 2, - "split_operators" => false), - "electron_timestepping" => OptionsDict("type" => "Fekete4(3)", - "nstep" => 10, - "dt" => 4.0e-11, - "minimum_dt" => 2.0e-11, - "initialization_residual_value" => 1.e10, - "converged_residual_value" => 1.e10, + "dt" => 1.0e-9, + "nwrite" => 2,), + "electron_timestepping" => OptionsDict("dt" => 1.0e-6, + "initialization_residual_value" => 2.e3, + "converged_residual_value" => 1.e3, "nwrite" => 10000, "nwrite_dfns" => 10000, "no_restart" => true), + #"nonlinear_solver" => OptionsDict("rtol" => 1.0e-2, + # "atol" => 1.0e-3,), "r" => OptionsDict("ngrid" => 1, "nelement" => 1), "z" => OptionsDict("ngrid" => 3, - "nelement" => 24, + "nelement" => 1, "bc" => "wall", "discretization" => "gausslegendre_pseudospectral", - "element_spacing_option" => "sqrt"), - "vpa" => OptionsDict("ngrid" => 3, - "nelement" => 16, + "element_spacing_option" => "uniform"), + "vpa" => OptionsDict("ngrid" => 4, + "nelement" => 5, "L" => 6.0, "bc" => "zero", + "element_spacing_option" => "coarse_tails", "discretization" => "gausslegendre_pseudospectral"), - "vz" => OptionsDict("ngrid" => 3, - "nelement" => 6, + "vz" => OptionsDict("ngrid" => 4, + "nelement" => 5, "L" => 6.0, "bc" => "zero", + "element_spacing_option" => "coarse_tails", "discretization" => "gausslegendre_pseudospectral"), "ion_source_1" => OptionsDict("active" => true, "z_profile" => "gaussian", diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9164e1de0..a6832cbbf 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -638,6 +638,10 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") end + # Need to always synchronize here because `t_params.dt[]` might have been read by + # other processes in the block even though the region type was + # `begin_region_serial()`. + _block_synchronize() begin_serial_region() @serial_region begin t_params.dt[] = t_params.previous_dt[] @@ -1549,7 +1553,11 @@ println("recalculating precon") if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] # If dt has increased a lot, we can probably try a larger initial dt for the next # solve. - t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] + begin_serial_region() + @serial_region begin + t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] + end + _block_synchronize() end if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] @@ -1558,6 +1566,7 @@ println("recalculating precon") @serial_region begin t_params.dt[] = t_params.previous_dt[] end + _block_synchronize() end if !electron_pdf_converged success = "kinetic-electrons" diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 06913aa2f..c79a934da 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -24,7 +24,7 @@ function electron_z_advection!(pdf_out, pdf_in, upar, vth, advect, z, vpa, spect # create a pointer to a scratch_dummy array to store the z-derivative of the electron pdf dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] d2pdf_dz2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] - begin_r_vperp_vpa_region() + begin_vperp_vpa_region() # get the updated speed along the z direction using the current pdf @views update_electron_speed_z!(advect[1], upar, vth, vpa, ir) # update adv_fac -- note that there is no factor of dt here because @@ -45,6 +45,7 @@ function electron_z_advection!(pdf_out, pdf_in, upar, vth, advect, z, vpa, spect # @views second_derivative!(d2pdf_dz2[ivpa,ivperp,:], pdf_in[ivpa,ivperp,:], z, spectral) #end # calculate the advection term + begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] #pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz] diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 7b60f7b6a..d2caf8f1c 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -953,7 +953,7 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p function external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_source, index, vperp, vpa, dt, ir) - begin_r_z_vperp_region() + begin_z_vperp_region() me_over_mi = composition.me_over_mi diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 6a2a7905e..0b888b77e 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -410,7 +410,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end end - if t_input["implicit_vpa_advection"] + if electron !== nothing && t_input["implicit_vpa_advection"] error("implicit_vpa_advection does not work at the moment. Need to figure out " * "what to do with constraints, as explicit and implicit parts would not " * "preserve constaints separately.") From ea71a4a7256ffcb6a2699a608ef2f7835f621697 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 23 Sep 2024 18:00:58 +0100 Subject: [PATCH 091/107] Update debug checks initialisation for new input setup --- .../debug_test/runtest_template.jl | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/moment_kinetics/debug_test/runtest_template.jl b/moment_kinetics/debug_test/runtest_template.jl index 224c57ce7..1b97a1e68 100644 --- a/moment_kinetics/debug_test/runtest_template.jl +++ b/moment_kinetics/debug_test/runtest_template.jl @@ -3,6 +3,7 @@ using moment_kinetics.time_advance: time_advance! using moment_kinetics.communication using moment_kinetics.looping: all_dimensions, dimension_combinations, anyv_dimension_combinations +using moment_kinetics.type_definitions: OptionsDict using moment_kinetics.Glob using moment_kinetics.Primes @@ -60,8 +61,9 @@ function runtests(; restart=false) n_factors = length(factor(Vector, global_size[])) for input ∈ test_input_list, debug_loop_type ∈ dimension_combinations_to_test - if :sn ∈ debug_loop_type && "n_neutral_species" ∈ keys(input) && - input["n_neutral_species"] <= 0 + composition_section = get(input, "composition", OptionsDict()) + if :sn ∈ debug_loop_type && "n_neutral_species" ∈ keys(composition_section) && + composition_section["n_neutral_species"] <= 0 # Skip neutral dimension parallelisation options if the number of neutral # species is zero, as these would just be equivalent to running in serial continue @@ -73,24 +75,19 @@ function runtests(; restart=false) dims_to_test = debug_loop_type end for d ∈ all_dimensions - nelement_name = "$(d)_nelement" - if nelement_name ∈ keys(input) - nelement = input[nelement_name] - elseif d ∈ (:vperp, :vzeta, :vr) - nelement = 1 + dim_section = get(input, "$d", OptionsDict()) + if "nelement" ∈ keys(dim_section) + nelement = dim_section["nelement"] else # Dummy value, here it only matters if this is 1 or greater than 1 - nelement = 2 + nelement = 1 end - ngrid_name = "$(d)_ngrid" - if ngrid_name ∈ keys(input) - ngrid = input[ngrid_name] - elseif d ∈ (:vperp, :vzeta, :vr) - ngrid = 1 + if "ngrid" ∈ keys(dim_section) + ngrid = dim_section["ngrid"] else # Dummy value, here it only matters if this is 1 or greater than 1 - ngrid = 2 + ngrid = 1 end if nelement == 1 && ngrid == 1 From 0b43b3301c924829804dbf756c474b7fe3b74500 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 23 Sep 2024 18:33:38 +0100 Subject: [PATCH 092/107] Fix indexing ions/neutrals when calling electron_energy_equation_no_r!() --- moment_kinetics/src/electron_kinetic_equation.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index a6832cbbf..2520b5483 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2935,10 +2935,10 @@ function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, if evolve_ppar @views electron_energy_equation_no_r!( ppar_out, ppar_in, moments.electron.dens[:,ir], - moments.electron.upar[:,ir], moments.ion.dens[:,ir], - moments.ion.upar[:,ir], moments.ion.ppar[:,ir], - moments.neutral.dens[:,ir], moments.neutral.uz[:,ir], - moments.neutral.pz[:,ir], moments.electron, collisions, dt, + moments.electron.upar[:,ir], moments.ion.dens[:,ir,:], + moments.ion.upar[:,ir,:], moments.ion.ppar[:,ir,:], + moments.neutral.dens[:,ir,:], moments.neutral.uz[:,ir,:], + moments.neutral.pz[:,ir,:], moments.electron, collisions, dt, composition, external_source_settings.electron, num_diss_params, z, ir) if ion_dt !== nothing From b918bdddecb0c72981de0af5b3910e0c974f566d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 24 Sep 2024 10:29:43 +0100 Subject: [PATCH 093/107] Fix timestep diagnostics plots --- .../makie_post_processing/src/makie_post_processing.jl | 8 ++++++++ moment_kinetics/src/load_data.jl | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 56ec2352e..2989eea20 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7637,6 +7637,14 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_electron", ax=ax_failures) + if !electron && ri.composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + # Kinetic electron nonlinear solver failure + counter += 1 + plot_1d(time, @view failure_caused_by_per_output[counter,:]; + linestyle=:dash, label=prefix * "failures caused by kinetic electron solve", + ax=ax_failures) + end end if !electron && ri.n_neutral_species > 0 # Neutral pdf failure counter diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index ac34affe3..b5c68134a 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -4731,7 +4731,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t electron_steps_per_output = get_variable(run_info, "electron_steps_per_output"; kwargs...) electron_failures_per_output = get_variable(run_info, "electron_failures_per_output"; kwargs...) electron_successful_steps_per_output = electron_steps_per_output - electron_failures_per_output - electron_pseudotime = get_variable("electron_cumulative_pseudotime"; kwargs...) + electron_pseudotime = get_variable(run_info, "electron_cumulative_pseudotime"; kwargs...) delta_t = copy(electron_pseudotime) for i ∈ length(delta_t):-1:2 From 2cac181a6fd8e1f361e5c21f2c2f4e510522eeb4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 24 Sep 2024 11:22:52 +0100 Subject: [PATCH 094/107] Option to switch off error handling in makie_post_processing For debugging it is useful to see the errors, with a backtrace. This commit adds an optional flag to the makie_post_processing input that can switch off the error handling. --- .../src/makie_post_processing.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 2989eea20..befa46972 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -604,8 +604,8 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, # - Don't allow setting "itime_*" and "itime_*_dfns" per-variable because we # load time and time_dfns in run_info and these must use the same # "itime_*"/"itime_*_dfns" setting as each variable. - time_index_options = ("itime_min", "itime_max", "itime_skip", "itime_min_dfns", - "itime_max_dfns", "itime_skip_dfns") + only_global_options = ("itime_min", "itime_max", "itime_skip", "itime_min_dfns", + "itime_max_dfns", "itime_skip_dfns", "handle_errors") set_defaults_and_check_top_level!(this_input_dict; # Options that only apply at the global level (not per-variable) @@ -656,11 +656,14 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, animate_vs_z_r=false, show_element_boundaries=false, steady_state_residual=false, + # By default, errors are caught so that later plots can still be made. For + # debugging it can be useful to turn this off. + handle_errors=true, ) section_defaults = OrderedDict(k=>v for (k,v) ∈ this_input_dict if !isa(v, AbstractDict) && - !(k ∈ time_index_options)) + !(k ∈ only_global_options)) for variable_name ∈ tuple(all_moment_variables..., timestep_diagnostic_variables...) set_defaults_and_check_section!( this_input_dict, variable_name; @@ -818,7 +821,8 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, end function makie_post_processing_error_handler(e::Exception, message::String) - if isa(e, InterruptException) + handle_errors = get(input_dict, "handle_errors", true) + if isa(e, InterruptException) || !handle_errors rethrow(e) else println(message * "\nError was $e.") From ce015397f0c376d4e018c6cf7ba4b845687a8b3b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 26 Sep 2024 11:33:31 +0100 Subject: [PATCH 095/107] Fix loading of external_source_controller_integral when restarting Should only be loaded when present in the restart file. --- moment_kinetics/src/load_data.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index b5c68134a..b678e1a48 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -771,16 +771,17 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, end end - if "external_source_controller_integral" ∈ get_variable_keys(dynamic) && - length(moments.ion.external_source_controller_integral) == 1 - moments.ion.external_source_controller_integral .= - load_slice(dynamic, "external_source_controller_integral", time_index) - elseif length(moments.ion.external_source_controller_integral) > 1 - moments.ion.external_source_controller_integral .= - reload_moment("external_source_controller_integral", dynamic, - time_index, r, z, r_range, z_range, restart_r, - restart_r_spectral, restart_z, restart_z_spectral, - interpolation_needed) + if "external_source_controller_integral" ∈ get_variable_keys(dynamic) + if length(moments.ion.external_source_controller_integral) == 1 + moments.ion.external_source_controller_integral .= + load_slice(dynamic, "external_source_controller_integral", time_index) + else + moments.ion.external_source_controller_integral .= + reload_moment("external_source_controller_integral", dynamic, + time_index, r, z, r_range, z_range, restart_r, + restart_r_spectral, restart_z, restart_z_spectral, + interpolation_needed) + end end pdf.ion.norm .= reload_ion_pdf(dynamic, time_index, moments, r, z, vperp, vpa, r_range, From 96f1b247ebe937a1f2d58be292474d767fded13e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 26 Sep 2024 22:13:12 +0100 Subject: [PATCH 096/107] Use separate sections for different electron source terms Allows different settings to be used for different electron sources, as intended, and gets rid of some warnings when multiple ion sources are used, but electron sources use defaults. --- moment_kinetics/src/external_sources.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index a16979a43..ca990f9e6 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -317,13 +317,13 @@ function setup_external_sources!(input_dict, r, z, electron_physics) PI_controller_amplitude, controller_source_profile, PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) end - function get_settings_electrons(ion_settings) + function get_settings_electrons(i, ion_settings) # Note most settings for the electron source are copied from the ion source, # because we require that the particle sources are the same for ions and # electrons. `source_T` can be set independently, and when using # `source_type="energy"`, the `source_strength` could also be set. input = set_defaults_and_check_section!( - input_dict, "electron_source"; + input_dict, "electron_source_$i"; source_strength=ion_settings.source_strength, source_T=ion_settings.source_T, ) @@ -339,8 +339,8 @@ function setup_external_sources!(input_dict, r, z, electron_physics) input["source_strength"] = ion_settings.source_strength end return electron_source_data(input["source_strength"], input["source_T"], - ion_settings.active, ion_settings.r_amplitude, - ion_settings.z_amplitude, ion_settings.source_type) + ion_settings.active, ion_settings.r_amplitude, + ion_settings.z_amplitude, ion_settings.source_type) end # put all ion sources into ion_source_data struct vector @@ -361,9 +361,9 @@ function setup_external_sources!(input_dict, r, z, electron_physics) electron_sources = electron_source_data[] if electron_physics ∈ (braginskii_fluid, kinetic_electrons, kinetic_electrons_with_temperature_equation) - electron_sources = [get_settings_electrons(this_source) for this_source ∈ ion_sources] + electron_sources = [get_settings_electrons(i, this_source) for (i,this_source) ∈ enumerate(ion_sources)] else - electron_sources = [get_settings_electrons(get_settings_ions(1, false))] + electron_sources = [get_settings_electrons(1, get_settings_ions(1, false))] end # put all neutral sources into neutral_source_data struct vector From c2e5a9bb56044a7ef17bd8efe95cf9859679a8f4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 24 Sep 2024 15:00:29 +0100 Subject: [PATCH 097/107] Test for kinetic electrons --- moment_kinetics/test/jacobian_matrix_tests.jl | 2 +- .../test/kinetic_electron_tests.jl | 291 ++++++++++++++++++ moment_kinetics/test/runtests.jl | 1 + 3 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 moment_kinetics/test/kinetic_electron_tests.jl diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 599ba9e9e..b739bc76a 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -2607,7 +2607,7 @@ function runtests() test_input["output"]["base_directory"] = test_output_directory @testset "Jacobian matrix" verbose=use_verbose begin - println(" Jacobian matrix") + println("Jacobian matrix") test_electron_z_advection(test_input) test_electron_vpa_advection(test_input) diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl new file mode 100644 index 000000000..cad2967ed --- /dev/null +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -0,0 +1,291 @@ +module KineticElectronsTests + +# Regression test with kinetic electrons, using wall boundary conditions, with recycling +# fraction less than 1 and a plasma source. Runs a Boltzmann electron simulation, restarts +# as a kinetic electron simulation, and checks the final Ez profile. + +include("setup.jl") + +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable +using moment_kinetics.looping + +using moment_kinetics.Glob + +# Input for Boltzmann electron part of run +boltzmann_input = OptionsDict( + "output" => OptionsDict("run_name" => "kinetic_electron_test_boltzmann_initialisation", + ), + "evolve_moments" => OptionsDict("parallel_pressure" => true, + "density" => true, + "moments_conservation" => true, + "parallel_flow" => true, + ), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + ), + "z" => OptionsDict("ngrid" => 5, + "discretization" => "gausslegendre_pseudospectral", + "nelement" => 8, + "bc" => "wall", + ), + "vpa" => OptionsDict("ngrid" => 6, + "discretization" => "gausslegendre_pseudospectral", + "nelement" => 17, + "L" => 24.0, + "element_spacing_option" => "coarse_tails", + "bc" => "zero", + ), + "composition" => OptionsDict("T_e" => 0.2, + "n_ion_species" => 1, + "n_neutral_species" => 0, + ), + "ion_species_1" => OptionsDict("initial_temperature" => 0.2, + "initial_density" => 1.0, + ), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "temperature_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 1.0, + "temperature_phase" => 0.0, + "upar_phase" => 0.0, + ), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "temperature_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "temperature_phase" => 0.0, + "upar_phase" => 0.0, + ), + "krook_collisions" => OptionsDict("use_krook" => true, + ), + "reactions" => OptionsDict("electron_ionization_frequency" => 0.0, + "ionization_frequency" => 0.5, + "charge_exchange_frequency" => 0.75, + ), + "ion_source_1" => OptionsDict("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.25, + "source_strength" => 2.0, + "source_T" => 2.0, + ), + "ion_source_2" => OptionsDict("active" => true, + "z_profile" => "wall_exp_decay", + "z_width" => 0.25, + "source_strength" => 0.5, + "source_T" => 0.2, + ), + "timestepping" => OptionsDict("type" => "SSPRK4", + "nstep" => 20000, + "dt" => 1.0e-4, + "nwrite" => 2500, + "nwrite_dfns" => 2500, + "steady_state_residual" => true, + ), + "ion_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, + ), + ) + +# Test use distributed-memory when possible +boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], global_size[]) + +kinetic_input = deepcopy(boltzmann_input) +kinetic_input["output"]["run_name"] = "kinetic_electron_test" +kinetic_input["composition"]["electron_physics"] = "kinetic_electrons" +kinetic_input["timestepping"] = OptionsDict("type" => "PareschiRusso2(2,2,2)", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 100, + "dt" => 1.0e-5, + "nwrite" => 100, + "nwrite_dfns" => 100, + ) + +kinetic_input["electron_timestepping"] = OptionsDict("nstep" => 5000000, + "dt" => 1.0e-5, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "decrease_dt_iteration_threshold" => 1000, + "increase_dt_iteration_threshold" => 0, + "cap_factor_ion_dt" => 10.0, + "initialization_residual_value" => 1.0e10, + "converged_residual_value" => 1.0e-1, + ) + +kinetic_input["nonlinear_solver"] = OptionsDict("nonlinear_max_iterations" => 1000, + "rtol" => 1.0e-8, + "atol" => 1.0e-14, + "linear_restart" => 5, + "preconditioner_update_interval" => 100, + ) + + +""" +Run a test for a single set of parameters +""" +function run_test() + test_output_directory = get_MPI_tempdir() + + this_boltzmann_input = deepcopy(boltzmann_input) + this_boltzmann_input["output"]["base_directory"] = test_output_directory + + this_kinetic_input = deepcopy(kinetic_input) + this_kinetic_input["output"]["base_directory"] = test_output_directory + + # Provide some progress info + println(" - testing kinetic electrons") + + # Suppress console output while running? Test is pretty long, so maybe better to leave + # intermediate output visible. Leaving `quietoutput()` commented out for now... + quietoutput() do + run_moment_kinetics(this_boltzmann_input) + + restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) + restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" + restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] + + # run kinetic electron simulation + run_moment_kinetics(this_kinetic_input; restart=restart_from_file) + end + + if global_rank[] == 0 + # Load and analyse output + ######################### + + path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) + + # open the output file(s) + run_info = get_run_info_no_setup(path, dfns=true) + + # load fields data + Ez = postproc_load_variable(run_info, "Ez")[:,1,:] + vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] + electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] + + close_run_info(run_info) + + # Regression test + # Benchmark data generated in serial on Linux + expected_Ez = [-0.5990683230706185 -0.6042082363495851; + -0.4944296396481284 -0.49692371894536586; + -0.30889032954504736 -0.3090990586904173; + -0.2064830747303776 -0.20700297720010077; + -0.21232457328748663 -0.2132748045598696; + -0.18233875912042674 -0.18276920923500758; + -0.16711429522309232 -0.1674324272230308; + -0.16920776495088916 -0.16937992443371716; + -0.1629417555658927 -0.16309341722744303; + -0.16619150334079993 -0.16633546753735795; + -0.15918194883360942 -0.15931554370144113; + -0.14034706409006803 -0.140469880250037; + -0.12602184032280567 -0.12613381924054493; + -0.10928716440800472 -0.10938345602505639; + -0.07053969674257217 -0.0706024520856333; + -0.0249577746169536 -0.024980098134854842; + -2.8327303308330514e-15 -1.599033453711614e-10; + 0.024957774616960776 0.02498009782733815; + 0.07053969674257636 0.07060245115760132; + 0.10928716440799909 0.10938345732933795; + 0.1260218403227975 0.1261338225947928; + 0.1403470640900294 0.14046988178255268; + 0.1591819488336015 0.15931556545456152; + 0.16619150334082114 0.1663353993955267; + 0.16294175556587748 0.16309307445724816; + 0.16920776495090983 0.1693805039915145; + 0.1671142952230893 0.1674318780154963; + 0.1823387591204167 0.18277420263305205; + 0.21232457328753865 0.21326329266495697; + 0.20648307473037922 0.20700517064938181; + 0.3088903295450278 0.3091144991453789; + 0.4944296396481271 0.49684270193048663; + 0.5990683230705801 0.6040141042995336] + expected_vthe = [27.08122333732766 27.083668406411196; + 27.087128061238488 27.08840157326006; + 27.090525010446868 27.090443986816897; + 27.091202856161452 27.0914901864659; + 27.09265674296987 27.093297466503625; + 27.093298138334738 27.09337068853881; + 27.094377689895747 27.094548022524926; + 27.09501542767647 27.095170446421935; + 27.095227831625575 27.095304545176944; + 27.095420218946682 27.09555512096241; + 27.095754478126825 27.095876494374046; + 27.096054218271775 27.096188914603825; + 27.096199500698383 27.096294431476554; + 27.09632238748948 27.096423453543142; + 27.096502792691805 27.096594041947167; + 27.096597492028636 27.096694147970585; + 27.096610989303674 27.096702959927107; + 27.096597492397745 27.096694148339555; + 27.096502794930903 27.096594044186332; + 27.096322390449956 27.09642345650393; + 27.096199499205674 27.096294429984052; + 27.09605421760595 27.096188913937898; + 27.095754438597055 27.095876454845936; + 27.09542019655419 27.095555098545283; + 27.095228009815475 27.095304723869976; + 27.095015217848847 27.09517023619458; + 27.094377437638478 27.09454777080713; + 27.093294828184774 27.093367377705533; + 27.092639150183448 27.09327987116632; + 27.0912092735745 27.091496606764487; + 27.09048496370012 27.090403937882265; + 27.08714601914595 27.08841951855733; + 27.08144246136634 27.08388753119234] + + if expected_Ez == nothing + # Error: no expected input provided + println("data tested would be: Ez=", Ez) + @test false + else + @test isapprox(Ez, expected_Ez, rtol=1.0e-7, atol=1.0e-9) + end + if expected_vthe == nothing + # Error: no expected input provided + println("data tested would be: vthe=", vthe) + @test false + else + @test isapprox(vthe, expected_vthe, rtol=2.0e-9, atol=0.0) + end + + # Iteration counts are fairly inconsistent, but it's good to check that they at + # least don't unexpectedly increase by an order of magnitude. + # Expected iteration count is from a serial run on Linux. + expected_electron_advance_linear_iterations = 10695 + @test electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations + if !(electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations) + println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + end + end + + if global_rank[] == 0 + # Delete output directory to avoid using too much disk space + rm(realpath(test_output_directory); recursive=true) + end +end + +function runtests() + if Sys.isapple() + @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin + end + return nothing + end + @testset "kinetic electrons" begin + println("Kinetic electron tests") + run_test() + end + return nothing +end + +end # KineticElectronsTests + + +using .KineticElectronsTests + +KineticElectronsTests.runtests() diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl index 97731c66a..26a1a863e 100644 --- a/moment_kinetics/test/runtests.jl +++ b/moment_kinetics/test/runtests.jl @@ -21,6 +21,7 @@ function runtests() include(joinpath(@__DIR__, "fokker_planck_time_evolution_tests.jl")) include(joinpath(@__DIR__, "gyroaverage_tests.jl")) include(joinpath(@__DIR__, "jacobian_matrix_tests.jl")) + include(joinpath(@__DIR__, "kinetic_electron_tests.jl")) end end From b767d2284ee62d8ccb2609b700480c727d204e72 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 26 Sep 2024 22:39:43 +0100 Subject: [PATCH 098/107] Fix docstring in electron_kinetic_equation --- moment_kinetics/src/electron_kinetic_equation.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 2520b5483..e6e63e0ac 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -53,6 +53,9 @@ using ..utils: get_minimum_CFL_z, get_minimum_CFL_vpa using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_derivatives!, calculate_electron_moment_derivatives_no_r! +# Only needed so we can reference it in a docstring +import ..runge_kutta + """ update_electron_pdf is a function that uses the electron kinetic equation to solve for the updated electron pdf From 4210dcc465c5774865b1fa124e8f5789b4805e0b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 27 Sep 2024 19:57:13 +0100 Subject: [PATCH 099/107] Use Givens rotations instead of MINPACK.jl in linear_solve!() Using Givens rotations (following 'Algorithm 2' in Zou (2023)) avoids the need for least-squares minimisations at each iteration of the GMRES linear solver. --- moment_kinetics/Project.toml | 1 - moment_kinetics/src/initial_conditions.jl | 3 + moment_kinetics/src/nonlinear_solvers.jl | 122 ++++++++++++++-------- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml index a3d2fa2bb..fc61043c9 100644 --- a/moment_kinetics/Project.toml +++ b/moment_kinetics/Project.toml @@ -17,7 +17,6 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" -MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 321a79cd5..1629a2a6b 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -748,6 +748,9 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.linear_restart, nl_solver_params.electron_advance.linear_max_restarts, nl_solver_params.electron_advance.H, + nl_solver_params.electron_advance.c, + nl_solver_params.electron_advance.s, + nl_solver_params.electron_advance.g, nl_solver_params.electron_advance.V, nl_solver_params.electron_advance.linear_initial_guess, nl_solver_params.electron_advance.n_solves, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 8b6dad4a6..185c4c51d 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -22,6 +22,7 @@ Useful references: [3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method [4] https://www.rikvoorhaar.com/blog/gmres [5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. +[6] Q. Zou, "GMRES algorithms over 35 years", Applied Mathematics and Computation 445, 127869 (2023), https://doi.org/10.1016/j.amc.2023.127869 """ module nonlinear_solvers @@ -36,12 +37,11 @@ using ..looping using ..type_definitions: mk_float, mk_int using LinearAlgebra -using MINPACK using MPI using SparseArrays using StatsBase: mean -struct nl_solver_info{TH,TV,Tlig,Tprecon} +struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon} rtol::mk_float atol::mk_float nonlinear_max_iterations::mk_int @@ -50,6 +50,9 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} linear_restart::mk_int linear_max_restarts::mk_int H::TH + c::Tcsg + s::Tcsg + g::Tcsg V::TV linear_initial_guess::Tlig n_solves::Ref{mk_int} @@ -108,17 +111,29 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa if serial_solve H = allocate_float(linear_restart + 1, linear_restart) + c = allocate_float(linear_restart + 1) + s = allocate_float(linear_restart + 1) + g = allocate_float(linear_restart + 1) V = allocate_float(reverse(coord_sizes)..., linear_restart+1) H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V .= 0.0 elseif electron_ppar_pdf_solve H = allocate_shared_float(linear_restart + 1, linear_restart) + c = allocate_shared_float(linear_restart + 1) + s = allocate_shared_float(linear_restart + 1) + g = allocate_shared_float(linear_restart + 1) V_ppar = allocate_shared_float(coords.z.n, linear_restart+1) V_pdf = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @serial_region begin H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V_ppar .= 0.0 V_pdf .= 0.0 end @@ -126,11 +141,17 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa V = (V_ppar, V_pdf) else H = allocate_shared_float(linear_restart + 1, linear_restart) + c = allocate_shared_float(linear_restart + 1) + s = allocate_shared_float(linear_restart + 1) + g = allocate_shared_float(linear_restart + 1) V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @serial_region begin H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V .= 0.0 end end @@ -167,8 +188,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, nl_solver_input.nonlinear_max_iterations, nl_solver_input.linear_rtol, nl_solver_input.linear_atol, - linear_restart, nl_solver_input.linear_max_restarts, H, V, - linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), + linear_restart, nl_solver_input.linear_max_restarts, H, c, s, g, + V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(nl_solver_input.preconditioner_update_interval), Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) @@ -324,8 +345,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, max_restarts=nl_solver_params.linear_max_restarts, left_preconditioner=left_preconditioner, right_preconditioner=right_preconditioner, - H=nl_solver_params.H, V=nl_solver_params.V, - rhs_delta=rhs_delta, + H=nl_solver_params.H, c=nl_solver_params.c, + s=nl_solver_params.s, g=nl_solver_params.g, + V=nl_solver_params.V, rhs_delta=rhs_delta, initial_guess=nl_solver_params.linear_initial_guess, distributed_norm=distributed_norm, distributed_dot=distributed_dot, @@ -1063,11 +1085,16 @@ end """ Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed at each step of the outer Newton iteration (in `newton_solve!()`). + +Uses Givens rotations to reduce the upper Hessenberg matrix to an upper triangular form, +which allows conveniently finding the residual at each step, and computing the final +solution, without calculating a least-squares minimisation at each step. See 'algorithm 2 +MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. """ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, restart, max_restarts, left_preconditioner, right_preconditioner, - H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot, - parallel_map, parallel_delta_x_calc, serial_solve) + H, c, s, g, V, rhs_delta, initial_guess, distributed_norm, + distributed_dot, parallel_map, parallel_delta_x_calc, serial_solve) # Solve (approximately?): # J δx = residual0 @@ -1105,6 +1132,14 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) beta = distributed_norm(w) parallel_map((w) -> w/beta, select_from_V(V, 1), w) + if serial_solve + g[1] = beta + else + begin_serial_region() + @serial_region begin + g[1] = beta + end + end # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is # so small that it is smaller than atol, in which case use atol instead. @@ -1115,7 +1150,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol counter = 0 restart_counter = 1 while true - for i ∈ 1:restart + i = 0 + while i < restart + i += 1 counter += 1 #println("Linear ", counter) @@ -1148,52 +1185,51 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol end parallel_map((w) -> w / H[i+1,i], select_from_V(V, i+1), w) - function temporary_residual!(result, guess) - #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess)) - result .= @view(H[1:i+1,1:i]) * guess - result[1] -= beta - end - - # Second argument to fsolve needs to be a Vector{Float64} if serial_solve - resize!(initial_guess, i) - initial_guess[1] = beta - initial_guess[2:i] .= 0.0 - lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) - residual = norm(lsq_result.f) + for j ∈ 1:i-1 + gamma = c[j] * H[j,i] + s[j] * H[j+1,i] + H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i] + H[j,i] = gamma + end + delta = sqrt(H[i,i]^2 + H[i+1,i]^2) + s[i] = H[i+1,i] / delta + c[i] = H[i,i] / delta + H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i] + H[i+1,i] = 0 + g[i+1] = -s[i] * g[i] + g[i] = c[i] * g[i] else begin_serial_region() - if global_rank[] == 0 - resize!(initial_guess, i) - initial_guess[1] = beta - initial_guess[2:i] .= 0.0 - lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) - residual = norm(lsq_result.f) - else - residual = nothing + @serial_region begin + for j ∈ 1:i-1 + gamma = c[j] * H[j,i] + s[j] * H[j+1,i] + H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i] + H[j,i] = gamma + end + delta = sqrt(H[i,i]^2 + H[i+1,i]^2) + s[i] = H[i+1,i] / delta + c[i] = H[i,i] / delta + H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i] + H[i+1,i] = 0 + g[i+1] = -s[i] * g[i] + g[i] = c[i] * g[i] end - residual = MPI.bcast(residual, comm_world; root=0) + _block_synchronize() end + residual = abs(g[i+1]) + if residual < tol break end end - # Update initial guess fo restart - if serial_solve - y = lsq_result.x - else - if global_rank[] == 0 - y = lsq_result.x - else - y = nothing - end - y = MPI.bcast(y, comm_world; root=0) - end + # Update initial guess to restart + ################################# + + @views y = H[1:i,1:i] \ g[1:i] - # The following is the `parallel_map()` version of + # The following calculates # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) - # slightly abusing splatting to get the sum into a lambda-function. parallel_delta_x_calc(delta_x, V, y) right_preconditioner(delta_x) From fae7d0d055416c8a0b8d6f511c91413583c2b0ae Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 28 Sep 2024 12:41:27 +0100 Subject: [PATCH 100/107] Avoid using MPI.bcast(), and other fixes to prevent type instability MPI.bcast() can communicate (almost?) any type of object, but that means that the type of its result is not necessarily known before communication happens, leading to type instability. Therefore prefer to use other MPI.jl functions that are type-stable. Use in-place MPI operations in a few more places to avoid possibility of allocating extra buffers. Fix function wrapping in nonlinear_solvers to avoid type instability. Putting the function in a variable inside an if..elseif..else before wrapping it confuses the compiler, so instead need to do the 'wrapping' separately for each case. Fix way inner loop counter is used to avoid type instability Remove wrapper functions in nonlinear_solvers to avoid type instability --- moment_kinetics/src/initial_conditions.jl | 26 +- moment_kinetics/src/nonlinear_solvers.jl | 511 +++++++++++----------- moment_kinetics/src/runge_kutta.jl | 28 +- 3 files changed, 287 insertions(+), 278 deletions(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 1629a2a6b..2ba8ccd4f 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -364,23 +364,21 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z # # q at the boundaries tells us dTe/dz for Braginskii electrons nu_ei = collisions.electron_fluid.nu_ei + dTe_dz_lower = Ref{mk_float}(0.0) if z.irank == 0 - dTe_dz_lower = @. -moments.electron.qpar[1,:] * 2.0 / 3.16 / - moments.electron.ppar[1,:] * - composition.me_over_mi * nu_ei - else - dTe_dz_lower = nothing + dTe_dz_lower[] = @. -moments.electron.qpar[1,:] * 2.0 / 3.16 / + moments.electron.ppar[1,:] * + composition.me_over_mi * nu_ei end - dTe_dz_lower = MPI.bcast(dTe_dz_lower, z.comm; root=0) + MPI.Bcast!(dTe_dz_lower, z.comm; root=0) + dTe_dz_upper = Ref{mk_float}(0.0) if z.irank == z.nrank - 1 - dTe_dz_upper = @. -moments.electron.qpar[end,:] * 2.0 / 3.16 / - moments.electron.ppar[end,:] * - composition.me_over_mi * nu_ei - else - dTe_dz_upper = nothing + dTe_dz_upper[] = @. -moments.electron.qpar[end,:] * 2.0 / 3.16 / + moments.electron.ppar[end,:] * + composition.me_over_mi * nu_ei end - dTe_dz_upper = MPI.bcast(dTe_dz_upper, z.comm; root=(z.nrank - 1)) + MPI.Bcast!(dTe_dz_upper, z.comm; root=(z.nrank - 1)) # The temperature should already be equal to the 'Boltzmann electron' # Te, so we just need to add a cubic that vanishes at ±Lz/2 @@ -401,9 +399,9 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z # 2*B - 3*2*B = -4*B = dTe/dz_upper + dTe/dz_lower Lz = z.L zg = z.grid - C = @. (dTe_dz_upper - dTe_dz_lower) / 2.0 / Lz + C = @. (dTe_dz_upper[] - dTe_dz_lower[]) / 2.0 / Lz A = @. -C * Lz^2 / 4 - B = @. -(dTe_dz_lower + dTe_dz_upper) / 4.0 + B = @. -(dTe_dz_lower[] + dTe_dz_upper[]) / 4.0 D = @. -4.0 * B / Lz^2 @loop_r ir begin @. moments.electron.temp[:,ir] += A[ir] + B[ir]*zg + C[ir]*zg^2 + diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 185c4c51d..392ec3fe4 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -235,9 +235,9 @@ function gather_nonlinear_solver_counters!(nl_solver_params) end if nl_solver_params.vpa_advection !== nothing # Solves are run in serial on separate processes, so need a global Allreduce - nl_solver_params.vpa_advection.global_n_solves[] = MPI.Allreduce(nl_solver_params.vpa_advection.n_solves[], +, comm_world) - nl_solver_params.vpa_advection.global_nonlinear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) - nl_solver_params.vpa_advection.global_linear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.n_solves[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) end end @@ -304,15 +304,20 @@ is not necessary to have a very tight `linear_rtol` for the GMRES solve. function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; left_preconditioner=nothing, right_preconditioner=nothing, coords) + # This wrapper function constructs the `solver_type` from coords, so that the body of + # the inner `newton_solve!()` can be fully type-stable + solver_type = Val(Symbol((c for c ∈ keys(coords))...)) + return newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params, solver_type; left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, coords=coords) +end +function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params, solver_type::Val; left_preconditioner=nothing, + right_preconditioner=nothing, coords) rtol = nl_solver_params.rtol atol = nl_solver_params.atol - distributed_norm = get_distributed_norm(coords, rtol, atol, x) - distributed_dot = get_distributed_dot(coords, rtol, atol, x) - parallel_map = get_parallel_map(coords) - parallel_delta_x_calc = get_parallel_delta_x_calc(coords) - if left_preconditioner === nothing left_preconditioner = identity end @@ -320,12 +325,14 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, right_preconditioner = identity end + norm_params = (coords, nl_solver_params.rtol, nl_solver_params.atol, x) + residual_func!(residual, x) - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) counter = 0 linear_counter = 0 - parallel_map(()->0.0, delta_x) + parallel_map(solver_type, ()->0.0, delta_x) close_counter = -1 close_linear_counter = -1 @@ -337,9 +344,10 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # Solve (approximately?): # J δx = -RHS(x) - parallel_map(()->0.0, delta_x) - linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w; - coords=coords, rtol=nl_solver_params.linear_rtol, + parallel_map(solver_type, ()->0.0, delta_x) + linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w, + solver_type, norm_params; coords=coords, + rtol=nl_solver_params.linear_rtol, atol=nl_solver_params.linear_atol, restart=nl_solver_params.linear_restart, max_restarts=nl_solver_params.linear_max_restarts, @@ -349,10 +357,6 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, s=nl_solver_params.s, g=nl_solver_params.g, V=nl_solver_params.V, rhs_delta=rhs_delta, initial_guess=nl_solver_params.linear_initial_guess, - distributed_norm=distributed_norm, - distributed_dot=distributed_dot, - parallel_map=parallel_map, - parallel_delta_x_calc=parallel_delta_x_calc, serial_solve=nl_solver_params.serial_solve) linear_counter += linear_its @@ -363,14 +367,14 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # during the line search, which might make it fail to converge). So calculate the # updated value in the buffer `w` until the line search is completed, and only # then copy it into `x`. - parallel_map((x) -> x, w, x) - parallel_map((x,delta_x) -> x + delta_x, w, x, delta_x) + parallel_map(solver_type, (x) -> x, w, x) + parallel_map(solver_type, (x,delta_x) -> x + delta_x, w, x, delta_x) residual_func!(residual, w) # For the Newton iteration, we want the norm divided by the (sqrt of the) number # of grid points, so we can use a tolerance that is independent of the size of the # grid. This is unlike the norms needed in `linear_solve!()`. - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) if isnan(residual_norm) error("NaN in Newton iteration at iteration $counter") end @@ -379,9 +383,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # decrease residual_norm s = 0.5 while s > 1.0e-2 - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) residual_func!(residual, x) - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) if residual_norm ≤ previous_residual_norm break end @@ -392,26 +396,26 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # # Failed to find a point that decreases the residual, so try a negative # # step # s = -1.0e-5 - # parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) # residual_func!(residual, x) - # residual_norm = distributed_norm(residual) + # residual_norm = distributed_norm(solver_type, residual, norm_params...) # if residual_norm > previous_residual_norm # # That didn't work either, so just take the full step and hope for # # convergence later - # parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) # residual_func!(residual, x) - # residual_norm = distributed_norm(residual) + # residual_norm = distributed_norm(solver_type, residual, norm_params...) # end #end if residual_norm > previous_residual_norm # Line search didn't work, so just take the full step and hope for # convergence later - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) residual_func!(residual, x) - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) end end - parallel_map((w) -> w, x, w) + parallel_map(solver_type, (w) -> w, x, w) previous_residual_norm = residual_norm #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol") @@ -447,36 +451,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, return success end -""" - get_distributed_norm(coords, rtol, atol, x) - -Get a 'distributed_norm' function that acts on arrays with dimensions given by the -entries in `coords`. -""" -function get_distributed_norm(coords, rtol, atol, x) - dims = keys(coords) - if dims == (:z,) - this_norm = distributed_norm_z - elseif dims == (:vpa,) - this_norm = distributed_norm_vpa - elseif dims == (:z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - this_norm = distributed_norm_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - this_norm = distributed_norm_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`distributed_norm_*()` function in nonlinear_solvers.jl") - end - - wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x=x, - coords=coords, kwargs...) - - return wrapped_norm -end - -function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) +function distributed_norm(::Val{:z}, residual::AbstractArray{mk_float, 1}, coords, rtol, + atol, x) z = coords.z begin_z_region() @@ -497,20 +473,21 @@ function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, end _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) + global_norm = Ref(local_norm) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm / z.n_global) - else - global_norm = nothing + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = sqrt(global_norm[] / z.n_global) end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + _block_synchronize() + MPI.Bcast!(global_norm, comm_block[]; root=0) - return global_norm + return global_norm[] end -function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) +function distributed_norm(::Val{:vpa}, residual::AbstractArray{mk_float, 1}, coords, rtol, + atol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. residual_norm = 0.0 @@ -523,8 +500,9 @@ function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol return residual_norm end -function distributed_norm_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}; - coords, rtol, atol, x) +function distributed_norm(::Val{:zvperpvpa}, + residual::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + coords, rtol, atol, x) ppar_residual, pdf_residual = residual x_ppar, x_pdf = x z = coords.z @@ -548,13 +526,12 @@ function distributed_norm_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 1} end _block_synchronize() - ppar_block_norm_square = MPI.Reduce(ppar_local_norm_square, +, comm_block[]) + global_norm_ppar = Ref(ppar_local_norm_square) # global_norm_ppar is the norm_square for ppar in the block + MPI.Reduce!(global_norm_ppar, +, comm_block[]) if block_rank[] == 0 - ppar_global_norm_square = MPI.Allreduce(ppar_block_norm_square, +, comm_inter_block[]) - ppar_global_norm_square = ppar_global_norm_square / z.n_global - else - ppar_global_norm_square = nothing + MPI.Allreduce!(global_norm_ppar, +, comm_inter_block[]) # global_norm_ppar is the norm_square for ppar in the whole grid + global_norm_ppar[] = global_norm_ppar[] / z.n_global end begin_z_vperp_vpa_region() @@ -570,24 +547,24 @@ function distributed_norm_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 1} end _block_synchronize() - pdf_block_norm_square = MPI.Reduce(pdf_local_norm_square, +, comm_block[]) + global_norm = Ref(pdf_local_norm_square) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - pdf_global_norm_square = MPI.Allreduce(pdf_block_norm_square, +, comm_inter_block[]) - pdf_global_norm_square = pdf_global_norm_square / (z.n_global * vperp.n_global * vpa.n_global) + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = global_norm[] / (z.n_global * vperp.n_global * vpa.n_global) - global_norm = sqrt(mean((ppar_global_norm_square, pdf_global_norm_square))) - else - global_norm = nothing + global_norm[] = sqrt(mean((global_norm_ppar[], global_norm[]))) end + _block_synchronize() - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + MPI.Bcast!(global_norm, comm_block[]; root=0) - return global_norm + return global_norm[] end -function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; - coords, rtol, atol, x) +function distributed_norm(::Val{:srzvperpvpa}, residual::AbstractArray{mk_float, 5}, + coords, rtol, atol, x) n_ion_species = coords.s r = coords.r z = coords.z @@ -617,49 +594,21 @@ function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; end _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) + global_norm = Ref(local_norm) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) - else - global_norm = nothing + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = sqrt(global_norm[] / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) - - return global_norm -end - -""" - get_distributed_dot(coords, rtol, atol, x) - -Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries -in `coords`. -""" -function get_distributed_dot(coords, rtol, atol, x) - dims = keys(coords) - if dims == (:z,) - this_dot = distributed_dot_z - elseif dims == (:vpa,) - this_dot = distributed_dot_vpa - elseif dims == (:z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - this_dot = distributed_dot_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - this_dot = distributed_dot_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`distributed_dot_*()` function in nonlinear_solvers.jl") - end - - wrapped_dot = (args...; kwargs...) -> this_dot(args...; rtol=rtol, atol=atol, x=x, - coords=coords, kwargs...) + _block_synchronize() + MPI.Bcast!(global_norm, comm_block[]; root=0) + return global_norm[] end -function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; - coords, atol, rtol, x) +function distributed_dot(::Val{:z}, v::AbstractArray{mk_float, 1}, + w::AbstractArray{mk_float, 1}, coords, rtol, atol, x) z = coords.z @@ -683,20 +632,19 @@ function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_fl end _block_synchronize() - block_dot = MPI.Reduce(local_dot, +, comm_block[]) + global_dot = Ref(local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) - global_dot = global_dot / z.n_global - else - global_dot = nothing + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / z.n_global end - return global_dot + return global_dot[] end -function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; - coords, atol, rtol, x) +function distributed_dot(::Val{:vpa}, v::AbstractArray{mk_float, 1}, + w::AbstractArray{mk_float, 1}, coords, rtol, atol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. local_dot = 0.0 @@ -707,9 +655,10 @@ function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_ return local_dot end -function distributed_dot_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, - w::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}; - coords, atol, rtol, x) +function distributed_dot(::Val{:zvperpvpa}, + v::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + w::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + coords, rtol, atol, x) v_ppar, v_pdf = v w_ppar, w_pdf = w x_ppar, x_pdf = x @@ -735,13 +684,12 @@ function distributed_dot_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 1},Abstrac end _block_synchronize() - ppar_block_dot = MPI.Reduce(ppar_local_dot, +, comm_block[]) + ppar_global_dot = Ref(ppar_local_dot) + MPI.Reduce!(ppar_global_dot, +, comm_block[]) # ppar_global_dot is the ppar_dot for the block if block_rank[] == 0 - ppar_global_dot = MPI.Allreduce(ppar_block_dot, +, comm_inter_block[]) - ppar_global_dot = ppar_global_dot / z.n_global - else - ppar_global_dot = nothing + MPI.Allreduce!(ppar_global_dot, +, comm_inter_block[]) # ppar_global_dot is the ppar_dot for the whole grid + ppar_global_dot[] = ppar_global_dot[] / z.n_global end begin_z_vperp_vpa_region() @@ -755,23 +703,21 @@ function distributed_dot_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 1},Abstrac end _block_synchronize() - pdf_block_dot = MPI.Reduce(pdf_local_dot, +, comm_block[]) + global_dot = Ref(pdf_local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - pdf_global_dot = MPI.Allreduce(pdf_block_dot, +, comm_inter_block[]) - pdf_global_dot = pdf_global_dot / (z.n_global * vperp.n_global * vpa.n_global) + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / (z.n_global * vperp.n_global * vpa.n_global) - global_dot = mean((ppar_global_dot, pdf_global_dot)) - else - global_dot = nothing + global_dot[] = mean((ppar_global_dot[], global_dot[])) end - return global_dot + return global_dot[] end -function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, - w::AbstractArray{mk_float, 5}; - coords, atol, rtol, x) +function distributed_dot(::Val{:srzvperpvpa}, v::AbstractArray{mk_float, 5}, + w::AbstractArray{mk_float, 5}, coords, rtol, atol, x) n_ion_species = coords.s r = coords.r z = coords.z @@ -800,46 +746,21 @@ function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, end _block_synchronize() - block_dot = MPI.Reduce(local_dot, +, comm_block[]) + global_dot = Ref(local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) - global_dot = global_dot / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) - else - global_dot = nothing + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) end - return global_dot -end - -""" - get_parallel_map(coords) - -Get a 'parallel_map' function that acts on arrays with dimensions given by the entries in -`coords`. -""" -function get_parallel_map(coords) - dims = keys(coords) - if dims == (:z,) - return parallel_map_z - elseif dims == (:vpa,) - return parallel_map_vpa - elseif dims == (:z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - return parallel_map_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - return parallel_map_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`parallel_map_*()` function in nonlinear_solvers.jl") - end + return global_dot[] end # Separate versions for different numbers of arguments as generator expressions result in # slow code -function parallel_map_z(func, result::AbstractArray{mk_float, 1}) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}) begin_z_region() @@ -849,7 +770,7 @@ function parallel_map_z(func, result::AbstractArray{mk_float, 1}) return nothing end -function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1) begin_z_region() @@ -859,18 +780,40 @@ function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) return nothing end -function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1, x2) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1, x2) begin_z_region() - @loop_z iz begin - result[iz] = func(x1[iz], x2[iz]) + if isa(x2, AbstractArray) + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz]) + end + else + @loop_z iz begin + result[iz] = func(x1[iz], x2) + end + end + + return nothing +end +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1, x2, x3) + + begin_z_region() + + if isa(x3, AbstractArray) + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz], x3[iz]) + end + else + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz], x3) + end end return nothing end -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. for i ∈ eachindex(result) @@ -878,7 +821,7 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) end return nothing end -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. for i ∈ eachindex(result) @@ -886,16 +829,36 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) end return nothing end -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2) +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1, x2) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. - for i ∈ eachindex(result) - result[i] = func(x1[i], x2[i]) + if isa(x2, AbstractArray) + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i]) + end + else + for i ∈ eachindex(result) + result[i] = func(x1[i], x2) + end + end + return nothing +end +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1, x2, x3) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + if isa(x3, AbstractArray) + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i], x3[i]) + end + else + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i], x3) + end end return nothing end -function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}) result_ppar, result_pdf = result @@ -913,7 +876,7 @@ function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1} return nothing end -function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 @@ -932,28 +895,77 @@ function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1} return nothing end -function parallel_map_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 - x2_ppar, x2_pdf = x2 - begin_z_region() + if isa(x2, Tuple) + x2_ppar, x2_pdf = x2 + begin_z_region() - @loop_z iz begin - result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz]) + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz]) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz]) + end + else + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2) + end end - begin_z_vperp_vpa_region() + return nothing +end +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2, x3) - @loop_z_vperp_vpa iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz]) + result_ppar, result_pdf = result + x1_ppar, x1_pdf = x1 + x2_ppar, x2_pdf = x2 + + if isa(x3, Tuple) + x3_ppar, x3_pdf = x3 + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz], x3_ppar[iz]) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz], x3_pdf[ivpa,ivperp,iz]) + end + else + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz], x3) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz], x3) + end end return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}) begin_s_r_z_vperp_vpa_region() @@ -963,7 +975,7 @@ function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1) begin_s_r_z_vperp_vpa_region() @@ -973,42 +985,40 @@ function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1, x2) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1, x2) begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + if isa(x2, AbstractArray) + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2) + end end return nothing end +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1, x2, x3) -""" - get_parallel_delta_x_calc(coords) + begin_s_r_z_vperp_vpa_region() -Get a parallelised function that calculates the update `delta_x` from the `V` matrix and -the minimum residual coefficients `y`. -""" -function get_parallel_delta_x_calc(coords) - dims = keys(coords) - if dims == (:z,) - return parallel_delta_x_calc_z - elseif dims == (:vpa,) - return parallel_delta_x_calc_vpa - elseif dims == (:z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - return parallel_delta_x_calc_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - return parallel_delta_x_calc_s_r_z_vperp_vpa + if isa(x3, AbstractArray) + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is], x3[ivpa,ivperp,iz,ir,is]) + end else - error("dims=$dims is not supported yet. Need to write another " - * "`parallel_delta_x_calc_*()` function in nonlinear_solvers.jl") + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is], x3) + end end + + return nothing end -function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) +function parallel_delta_x_calc(::Val{:z}, delta_x::AbstractArray{mk_float, 1}, V, y) begin_z_region() @@ -1022,7 +1032,7 @@ function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) return nothing end -function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) +function parallel_delta_x_calc(::Val{:vpa}, delta_x::AbstractArray{mk_float, 1}, V, y) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. ny = length(y) @@ -1034,7 +1044,7 @@ function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) return nothing end -function parallel_delta_x_calc_z_vperp_vpa(delta_x::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, V, y) +function parallel_delta_x_calc(::Val{:zvperpvpa}, delta_x::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, V, y) delta_x_ppar, delta_x_pdf = delta_x V_ppar, V_pdf = V @@ -1060,7 +1070,7 @@ function parallel_delta_x_calc_z_vperp_vpa(delta_x::Tuple{AbstractArray{mk_float return nothing end -function parallel_delta_x_calc_s_r_z_vperp_vpa(delta_x::AbstractArray{mk_float, 5}, V, y) +function parallel_delta_x_calc(::Val{:srzvperpvpa}, delta_x::AbstractArray{mk_float, 5}, V, y) begin_s_r_z_vperp_vpa_region() @@ -1091,10 +1101,10 @@ which allows conveniently finding the residual at each step, and computing the f solution, without calculating a least-squares minimisation at each step. See 'algorithm 2 MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. """ -function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, - restart, max_restarts, left_preconditioner, right_preconditioner, - H, c, s, g, V, rhs_delta, initial_guess, distributed_norm, - distributed_dot, parallel_map, parallel_delta_x_calc, serial_solve) +function linear_solve!(x, residual_func!, residual0, delta_x, v, w, solver_type::Val, + norm_params; coords, rtol, atol, restart, max_restarts, + left_preconditioner, right_preconditioner, H, c, s, g, V, + rhs_delta, initial_guess, serial_solve) # Solve (approximately?): # J δx = residual0 @@ -1114,9 +1124,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol function approximate_Jacobian_vector_product!(v) right_preconditioner(v) - parallel_map((x,v) -> x + Jv_scale_factor * v, v, x, v) + parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v) residual_func!(rhs_delta, v) - parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, + parallel_map(solver_type, (rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, v, rhs_delta, residual0) left_preconditioner(v) return v @@ -1124,14 +1134,14 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # To start with we use 'w' as a buffer to make a copy of residual0 to which we can apply # the left-preconditioner. - parallel_map((delta_x) -> delta_x, v, delta_x) + parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x) left_preconditioner(residual0) # This function transforms the data stored in 'v' from δx to ≈J.δx approximate_Jacobian_vector_product!(v) # Now we actually set 'w' as the first Krylov vector, and normalise it. - parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) - beta = distributed_norm(w) - parallel_map((w) -> w/beta, select_from_V(V, 1), w) + parallel_map(solver_type, (residual0, v) -> -residual0 - v, w, residual0, v) + beta = distributed_norm(solver_type, w, norm_params...) + parallel_map(solver_type, (w,beta) -> w/beta, select_from_V(V, 1), w, beta) if serial_solve g[1] = beta else @@ -1150,20 +1160,20 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol counter = 0 restart_counter = 1 while true - i = 0 - while i < restart - i += 1 + inner_counter = 0 + for i ∈ 1:restart + inner_counter = i counter += 1 #println("Linear ", counter) # Compute next Krylov vector - parallel_map((V) -> V, w, select_from_V(V, i)) + parallel_map(solver_type, (V) -> V, w, select_from_V(V, i)) approximate_Jacobian_vector_product!(w) # Gram-Schmidt orthogonalization for j ∈ 1:i - parallel_map((V) -> V, v, select_from_V(V, j)) - w_dot_Vj = distributed_dot(w, v) + parallel_map(solver_type, (V) -> V, v, select_from_V(V, j)) + w_dot_Vj = distributed_dot(solver_type, w, v, norm_params...) if serial_solve H[j,i] = w_dot_Vj else @@ -1172,9 +1182,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[j,i] = w_dot_Vj end end - parallel_map((w, V) -> w - H[j,i] * V, w, w, select_from_V(V, j)) + parallel_map(solver_type, (w, V) -> w - H[j,i] * V, w, w, select_from_V(V, j)) end - norm_w = distributed_norm(w) + norm_w = distributed_norm(solver_type, w, norm_params...) if serial_solve H[i+1,i] = norm_w else @@ -1183,7 +1193,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[i+1,i] = norm_w end end - parallel_map((w) -> w / H[i+1,i], select_from_V(V, i+1), w) + parallel_map(solver_type, (w) -> w / H[i+1,i], select_from_V(V, i+1), w) if serial_solve for j ∈ 1:i-1 @@ -1222,6 +1232,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol break end end + i = inner_counter # Update initial guess to restart ################################# @@ -1230,7 +1241,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # The following calculates # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) - parallel_delta_x_calc(delta_x, V, y) + parallel_delta_x_calc(solver_type, delta_x, V, y) right_preconditioner(delta_x) if residual < tol || restart_counter > max_restarts @@ -1241,16 +1252,16 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Store J.delta_x in the variable delta_x, to use it to calculate the new first # Krylov vector v/beta. - parallel_map((delta_x) -> delta_x, v, delta_x) + parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x) approximate_Jacobian_vector_product!(v) # Note residual0 has already had the left_preconditioner!() applied to it. - parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) - beta = distributed_norm(v) + parallel_map(solver_type, (residual0, v) -> -residual0 - v, v, residual0, v) + beta = distributed_norm(solver_type, v, norm_params...) for i ∈ 2:length(y) - parallel_map(() -> 0.0, select_from_V(V, i)) + parallel_map(solver_type, () -> 0.0, select_from_V(V, i)) end - parallel_map((v) -> v/beta, select_from_V(V, 1), v) + parallel_map(solver_type, (v,beta) -> v/beta, select_from_V(V, 1), v, beta) end return counter diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index ff61661ba..a83e49e21 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -1082,26 +1082,26 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, if error_norm_method == "Linf" # Get overall maximum error on the shared-memory block - error_norms = MPI.Reduce(error_norms, max, comm_block[]; root=0) + MPI.Reduce!(error_norms, max, comm_block[]; root=0) - error_norm = nothing + error_norm = Ref{mk_float}(0.0) max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks - error_norms = MPI.Allreduce(error_norms, max, comm_inter_block[]) + MPI.Allreduce!(error_norms, max, comm_inter_block[]) max_error_variable_index = argmax(error_norms) - error_norm = error_norms[max_error_variable_index] + error_norm[] = error_norms[max_error_variable_index] end - error_norm = MPI.bcast(error_norm, 0, comm_block[]) + MPI.Bcast!(error_norm, 0, comm_block[]) elseif error_norm_method == "L2" # Get overall maximum error on the shared-memory block - error_norms = MPI.Reduce(error_norms, +, comm_block[]; root=0) + MPI.Reduce!(error_norms, +, comm_block[]; root=0) - error_norm = nothing + error_norm = Ref{mk_float}(0.0) max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks - error_norms = MPI.Allreduce(error_norms, +, comm_inter_block[]) + MPI.Allreduce!(error_norms, +, comm_inter_block[]) # So far `error_norms` is the sum of squares of the errors. Now that summation # is finished, need to divide by total number of points and take square-root. @@ -1110,13 +1110,13 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, # Weight the error from each variable equally by taking the mean, so the # larger number of points in the distribution functions does not mean that # error on the moments is ignored. - error_norm = mean(error_norms) + error_norm[] = mean(error_norms) # Record which variable had the maximum error max_error_variable_index = argmax(error_norms) end - error_norm = MPI.bcast(error_norm, 0, comm_block[]) + MPI.Bcast!(error_norm, 0, comm_block[]) else error("Unrecognized error_norm_method '$method'") end @@ -1170,7 +1170,7 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, t_params.step_to_moments_output[] = false t_params.step_to_dfns_output[] = false end - elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) + elseif (error_norm[] > 1.0 || isnan(error_norm[])) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when # t+dt=next_output_time. # Use current_dt instead of t_params.dt[] here because we are about to write to @@ -1191,7 +1191,7 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, # Get new timestep estimate using same formula as for a successful step, but # limit decrease to factor 1/2 - this factor should probably be settable! t_params.dt[] = max(t_params.dt[] / 2.0, - t_params.dt[] * t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order)) + t_params.dt[] * t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order)) t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) # Don't update the simulation time, as this step failed @@ -1206,7 +1206,7 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, t_params.step_to_moments_output[] = false t_params.step_to_dfns_output[] = false - #println("t=$t, timestep failed, error_norm=$error_norm, error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) + #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) end else @serial_region begin @@ -1237,7 +1237,7 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, # `step_update_prefactor` is a constant numerical factor to make the estimate # of a good value for the next timestep slightly conservative. It defaults to # 0.9. - t_params.dt[] *= t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order) + t_params.dt[] *= t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order) if t_params.dt[] > CFL_limit t_params.dt[] = CFL_limit From ae287a77cd96b84634513562e901c0d3b2110819 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 28 Sep 2024 15:43:03 +0100 Subject: [PATCH 101/107] Replace `Ref` with `Base.RefValue` in struct definitions `Ref` is not a concrete type, so a struct defined with `Ref` members is not type-stable. The concrete type (relevant to our usage) is `Base.RefValue`. --- moment_kinetics/src/communication.jl | 2 +- moment_kinetics/src/input_structs.jl | 12 +++++------ .../src/moment_kinetics_structs.jl | 10 +++++----- moment_kinetics/src/nonlinear_solvers.jl | 20 +++++++++---------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/moment_kinetics/src/communication.jl b/moment_kinetics/src/communication.jl index 394446a4a..0c9080a36 100644 --- a/moment_kinetics/src/communication.jl +++ b/moment_kinetics/src/communication.jl @@ -397,7 +397,7 @@ end """ struct DebugMPISharedArray{T, N, TArray <: AbstractArray{T,N}, TIntArray <: AbstractArray{mk_int,N}, TBoolArray <: AbstractArray{Bool,N}} <: AbstractArray{T, N} data::TArray - accessed::Ref{Bool} + accessed::Base.RefValue{Bool} is_initialized::TIntArray is_read::TBoolArray is_written::TBoolArray diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index ac6ecf192..bd295c257 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -45,12 +45,12 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero step_to_dfns_output::MPISharedArray{Bool,1} write_moments_output::MPISharedArray{Bool,1} write_dfns_output::MPISharedArray{Bool,1} - step_counter::Ref{mk_int} - max_step_count_this_ion_step::Ref{mk_int} - max_t_increment_this_ion_step::Ref{mk_float} - moments_output_counter::Ref{mk_int} - dfns_output_counter::Ref{mk_int} - failure_counter::Ref{mk_int} + step_counter::Base.RefValue{mk_int} + max_step_count_this_ion_step::Base.RefValue{mk_int} + max_t_increment_this_ion_step::Base.RefValue{mk_float} + moments_output_counter::Base.RefValue{mk_int} + dfns_output_counter::Base.RefValue{mk_int} + failure_counter::Base.RefValue{mk_int} failure_caused_by::Vector{mk_int} limit_caused_by::Vector{mk_int} nwrite_moments::mk_int diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl index a1ef580e2..18b5b0189 100644 --- a/moment_kinetics/src/moment_kinetics_structs.jl +++ b/moment_kinetics/src/moment_kinetics_structs.jl @@ -157,23 +157,23 @@ struct moments_electron_substruct # this is the particle density dens::MPISharedArray{mk_float,2} # flag that keeps track of if the density needs updating before use - dens_updated::Ref{Bool} + dens_updated::Base.RefValue{Bool} # this is the parallel flow upar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not upar needs updating before use - upar_updated::Ref{Bool} + upar_updated::Base.RefValue{Bool} # this is the parallel pressure ppar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not ppar needs updating before use - ppar_updated::Ref{Bool} + ppar_updated::Base.RefValue{Bool} # this is the temperature temp::MPISharedArray{mk_float,2} # flag that keeps track of whether or not temp needs updating before use - temp_updated::Ref{Bool} + temp_updated::Base.RefValue{Bool} # this is the parallel heat flux qpar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not qpar needs updating before use - qpar_updated::Ref{Bool} + qpar_updated::Base.RefValue{Bool} # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m) vth::MPISharedArray{mk_float,2} # this is the parallel friction force between ions and electrons diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 392ec3fe4..89fb4ddf4 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -55,17 +55,17 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon} g::Tcsg V::TV linear_initial_guess::Tlig - n_solves::Ref{mk_int} - nonlinear_iterations::Ref{mk_int} - linear_iterations::Ref{mk_int} - global_n_solves::Ref{mk_int} - global_nonlinear_iterations::Ref{mk_int} - global_linear_iterations::Ref{mk_int} - solves_since_precon_update::Ref{mk_int} - precon_dt::Ref{mk_float} + n_solves::Base.RefValue{mk_int} + nonlinear_iterations::Base.RefValue{mk_int} + linear_iterations::Base.RefValue{mk_int} + global_n_solves::Base.RefValue{mk_int} + global_nonlinear_iterations::Base.RefValue{mk_int} + global_linear_iterations::Base.RefValue{mk_int} + solves_since_precon_update::Base.RefValue{mk_int} + precon_dt::Base.RefValue{mk_float} serial_solve::Bool - max_nonlinear_iterations_this_step::Ref{mk_int} - max_linear_iterations_this_step::Ref{mk_int} + max_nonlinear_iterations_this_step::Base.RefValue{mk_int} + max_linear_iterations_this_step::Base.RefValue{mk_int} preconditioner_type::String preconditioner_update_interval::mk_int preconditioners::Tprecon From 98a43ba7ed169b307e6272d249e0321e520bae9d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 28 Sep 2024 17:34:14 +0100 Subject: [PATCH 102/107] Fix reloading of `t_params.electron.previous_dt[]` when restarting --- moment_kinetics/src/load_data.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index b678e1a48..f9c400dbd 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -1021,7 +1021,10 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, time_index) end if "electron_dt" ∈ keys(dynamic) - electron_dt = load_slice(dynamic, "electron_dt", time_index) + # The algorithm for electron pseudo-timestepping actually starts each + # solve using t_params.electron.previous_dt[], so "electron_previous_dt" + # is the thing to load. + electron_dt = load_slice(dynamic, "electron_previous_dt", time_index) end if "electron_dt_before_last_fail" ∈ keys(dynamic) electron_dt_before_last_fail = From aa9c281d58a3eb9989576af12f194e8887a407f4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 29 Sep 2024 16:43:42 +0100 Subject: [PATCH 103/107] Make dt, etc. non-shared memory Reduces need for synchronizations and reduces possibilities for bugs. --- .../src/electron_kinetic_equation.jl | 147 +++---- moment_kinetics/src/input_structs.jl | 19 +- moment_kinetics/src/load_data.jl | 36 +- moment_kinetics/src/runge_kutta.jl | 386 +++++++++--------- moment_kinetics/src/time_advance.jl | 74 ++-- 5 files changed, 309 insertions(+), 353 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index e6e63e0ac..07d8229df 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -464,10 +464,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll end # update the time following the pdf update - @serial_region begin - t_params.t[] += t_params.previous_dt[] - end - _block_synchronize() + t_params.t[] += t_params.previous_dt[] residual = -1.0 if t_params.previous_dt[] > 0.0 @@ -641,14 +638,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") end - # Need to always synchronize here because `t_params.dt[]` might have been read by - # other processes in the block even though the region type was - # `begin_region_serial()`. - _block_synchronize() - begin_serial_region() - @serial_region begin - t_params.dt[] = t_params.previous_dt[] - end + t_params.dt[] = t_params.previous_dt[] begin_r_z_region() @loop_r_z ir iz begin @@ -717,10 +707,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end if initial_time !== nothing - @serial_region begin - t_params.t[] = initial_time - end - _block_synchronize() + t_params.t[] = initial_time # Make sure that output times are set relative to this initial_time (the values in # t_params are set relative to 0.0). moments_output_times = t_params.moments_output_times .+ initial_time @@ -1331,71 +1318,63 @@ println("recalculating precon") coords=(z=z, vperp=vperp, vpa=vpa)) if newton_success #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) - begin_serial_region() - @serial_region begin - # update the time following the pdf update - t_params.t[] += t_params.dt[] - - if first_step && !reduced_by_ion_dt - # Adjust t_params.previous_dt[] which gives the initial timestep for - # the electron pseudotimestepping loop. - # If ion_dt ", t_params.previous_dt[]) - #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) - elseif nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold - # Step succeeded, but took a lot of iterations so decrease initial - # step size. - print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) - t_params.previous_dt[] /= t_params.max_increase_factor - println(" -> ", t_params.previous_dt[]) - #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) - elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.previous_dt[] < t_params.cap_factor_ion_dt * ion_dt) - # Only took a few iterations, so increase initial step size. - print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) - if ion_dt === nothing - t_params.previous_dt[] *= t_params.max_increase_factor - else - t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) - end - println(" -> ", t_params.previous_dt[]) - end - end - - # Adjust the timestep depending on the iteration count. - # Note nl_solver_params.max_linear_iterations_this_step[] gives the total - # number of iterations, so is a better measure of the total work done by - # the solver than the nonlinear iteration count, or the linear iterations - # per nonlinear iteration - #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) - if nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold && t_params.dt[] > t_params.previous_dt[] - # Step succeeded, but took a lot of iterations so decrease step size. - t_params.dt[] /= t_params.max_increase_factor - elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.dt[] < t_params.cap_factor_ion_dt * ion_dt) - # Only took a few iterations, so increase step size. + # update the time following the pdf update + t_params.t[] += t_params.dt[] + + if first_step && !reduced_by_ion_dt + # Adjust t_params.previous_dt[] which gives the initial timestep for + # the electron pseudotimestepping loop. + # If ion_dt ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) + elseif nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold + # Step succeeded, but took a lot of iterations so decrease initial + # step size. + print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] /= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.previous_dt[] < t_params.cap_factor_ion_dt * ion_dt) + # Only took a few iterations, so increase initial step size. + print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) if ion_dt === nothing - t_params.dt[] *= t_params.max_increase_factor + t_params.previous_dt[] *= t_params.max_increase_factor else - t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) + t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end + println(" -> ", t_params.previous_dt[]) + end + end + + # Adjust the timestep depending on the iteration count. + # Note nl_solver_params.max_linear_iterations_this_step[] gives the total + # number of iterations, so is a better measure of the total work done by + # the solver than the nonlinear iteration count, or the linear iterations + # per nonlinear iteration + #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) + if nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold && t_params.dt[] > t_params.previous_dt[] + # Step succeeded, but took a lot of iterations so decrease step size. + t_params.dt[] /= t_params.max_increase_factor + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.dt[] < t_params.cap_factor_ion_dt * ion_dt) + # Only took a few iterations, so increase step size. + if ion_dt === nothing + t_params.dt[] *= t_params.max_increase_factor + else + t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end end - _block_synchronize() first_step = false else - begin_serial_region() - @serial_region begin - t_params.dt[] *= 0.5 - end - _block_synchronize() + t_params.dt[] *= 0.5 # Force the preconditioner to be recalculated, because we have just # changed `dt` by a fairly large amount. @@ -1556,20 +1535,12 @@ println("recalculating precon") if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] # If dt has increased a lot, we can probably try a larger initial dt for the next # solve. - begin_serial_region() - @serial_region begin - t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] - end - _block_synchronize() + t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] end if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] # Reset dt in case it was reduced to be less than 0.5*ion_dt - begin_serial_region() - @serial_region begin - t_params.dt[] = t_params.previous_dt[] - end - _block_synchronize() + t_params.dt[] = t_params.previous_dt[] end if !electron_pdf_converged success = "kinetic-electrons" @@ -2571,11 +2542,6 @@ function electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, error_norms = error_norm_type[] total_points = mk_int[] - # Read the current dt here, so we only need one _block_synchronize() call for this and - # the begin_s_r_z_vperp_vpa_region() - current_dt = t_params.dt[] - _block_synchronize() - # Test CFL conditions for advection in electron kinetic equation to give stability # limit for timestep # @@ -2655,9 +2621,8 @@ function electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, end adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, - current_dt, error_norm_method, "", 0.0, - composition; electron=true, - local_max_dt=local_max_dt) + error_norm_method, "", 0.0, composition; + electron=true, local_max_dt=local_max_dt) if t_params.previous_dt[] == 0.0 # Timestep failed, so reset scratch[t_params.n_rk_stages+1] equal to # scratch[1] to start the timestep over. diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index bd295c257..f427c3a83 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -34,17 +34,16 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero n_variables::mk_int nstep::mk_int end_time::mk_float - t::MPISharedArray{mk_float,1} - dt::MPISharedArray{mk_float,1} - previous_dt::MPISharedArray{mk_float,1} - next_output_time::MPISharedArray{mk_float,1} - dt_before_output::MPISharedArray{mk_float,1} - dt_before_last_fail::MPISharedArray{mk_float,1} + t::Base.RefValue{mk_float} + dt::Base.RefValue{mk_float} + previous_dt::Base.RefValue{mk_float} + dt_before_output::Base.RefValue{mk_float} + dt_before_last_fail::Base.RefValue{mk_float} CFL_prefactor::mk_float - step_to_moments_output::MPISharedArray{Bool,1} - step_to_dfns_output::MPISharedArray{Bool,1} - write_moments_output::MPISharedArray{Bool,1} - write_dfns_output::MPISharedArray{Bool,1} + step_to_moments_output::Base.RefValue{Bool} + step_to_dfns_output::Base.RefValue{Bool} + write_moments_output::Base.RefValue{Bool} + write_dfns_output::Base.RefValue{Bool} step_counter::Base.RefValue{mk_int} max_step_count_this_ion_step::Base.RefValue{mk_int} max_t_increment_this_ion_step::Base.RefValue{mk_float} diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index f9c400dbd..8d32077fd 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -632,10 +632,10 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_prefix_iblock, time_index, composition, geometry, r, z, vpa, vperp, vzeta, vr, vz) code_time = 0.0 - dt = nothing - dt_before_last_fail = nothing - electron_dt = nothing - electron_dt_before_last_fail = nothing + dt = Ref(-Inf) + dt_before_last_fail = Ref(Inf) + electron_dt = Ref(-Inf) + electron_dt_before_last_fail = Ref(Inf) previous_runs_info = nothing restart_electron_physics = nothing begin_serial_region() @@ -1010,24 +1010,24 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, # If "dt" is not present, the file being restarted from is an older # one that did not have an adaptive timestep, so just leave the value # of "dt" from the input file. - dt = load_slice(dynamic, "dt", time_index) + dt[] = load_slice(dynamic, "dt", time_index) end if "dt_before_last_fail" ∈ keys(dynamic) # If "dt_before_last_fail" is not present, the file being # restarted from is an older one that did not have an adaptive # timestep, so just leave the value of "dt_before_last_fail" from # the input file. - dt_before_last_fail = load_slice(dynamic, "dt_before_last_fail", + dt_before_last_fail[] = load_slice(dynamic, "dt_before_last_fail", time_index) end if "electron_dt" ∈ keys(dynamic) # The algorithm for electron pseudo-timestepping actually starts each # solve using t_params.electron.previous_dt[], so "electron_previous_dt" # is the thing to load. - electron_dt = load_slice(dynamic, "electron_previous_dt", time_index) + electron_dt[] = load_slice(dynamic, "electron_previous_dt", time_index) end if "electron_dt_before_last_fail" ∈ keys(dynamic) - electron_dt_before_last_fail = + electron_dt_before_last_fail[] = load_slice(dynamic, "electron_dt_before_last_fail", time_index) end finally @@ -1048,9 +1048,25 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, moments.neutral.qz_updated .= true restart_electron_physics = MPI.bcast(restart_electron_physics, 0, comm_block[]) + MPI.Bcast!(dt, comm_block[]) + MPI.Bcast!(dt_before_last_fail, comm_block[]) + MPI.Bcast!(electron_dt, comm_block[]) + MPI.Bcast!(electron_dt_before_last_fail, comm_block[]) - return code_time, dt, dt_before_last_fail, electron_dt, electron_dt_before_last_fail, - previous_runs_info, time_index, restart_electron_physics + if dt[] == -Inf + dt = nothing + else + dt = dt[] + end + if electron_dt[] == -Inf + electron_dt = nothing + else + electron_dt = electron_dt[] + end + + return code_time, dt, dt_before_last_fail[], electron_dt, + electron_dt_before_last_fail[], previous_runs_info, time_index, + restart_electron_physics end """ diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index a83e49e21..0c87369b7 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -1057,28 +1057,30 @@ end """ adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method, - success, nl_max_its_fraction) + total_points, error_norm_method, success, + nl_max_its_fraction, composition; + electron=false, local_max_dt::mk_float=Inf) Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method, - success, nl_max_its_fraction, composition; + total_points, error_norm_method, success, + nl_max_its_fraction, composition; electron=false, local_max_dt::mk_float=Inf) # Get global minimum of CFL limits - CFL_limit = nothing + CFL_limit = Ref(0.0) this_limit_caused_by = nothing @serial_region begin # Get maximum error over all blocks CFL_limits = MPI.Allreduce(CFL_limits, min, comm_inter_block[]) CFL_limit_caused_by = argmin(CFL_limits) - CFL_limit = CFL_limits[CFL_limit_caused_by] + CFL_limit[] = CFL_limits[CFL_limit_caused_by] # Reserve first four entries of t_params.limit_caused_by for max_increase_factor, # max_increase_factor_near_fail, minimum_dt, maximum_dt limits and # high_nl_iterations, then the next `n_variables` for RK accuracy limits. this_limit_caused_by = CFL_limit_caused_by + 5 + t_params.n_variables end + MPI.Bcast!(CFL_limit, comm_block[]) if error_norm_method == "Linf" # Get overall maximum error on the shared-memory block @@ -1124,240 +1126,240 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, if success != "" # Iteration failed in implicit part of timestep try decreasing timestep - @serial_region begin - t_params.failure_counter[] += 1 - - if t_params.previous_dt[] > 0.0 - # If previous_dt=0, the previous step was also a failure so only update - # dt_before_last_fail when previous_dt>0 - t_params.dt_before_last_fail[] = t_params.previous_dt[] - end + t_params.failure_counter[] += 1 - # Decrease timestep by 1/2 - this factor should probably be settable! - # Note when nonlinear solve iteration fails, we do not enforce - # minimum_dt, as the timesolver must error if we do not decrease dt. - if t_params.dt[] > t_params.minimum_dt - # ...but try decreasing just to minimum_dt first, if the dt is still - # bigger than this. - t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) - else - t_params.dt[] = t_params.dt[] / 2.0 - end + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end - # Don't update the simulation time, as this step failed - t_params.previous_dt[] = 0.0 - - # Call the 'cause' of the timestep failure the variable that has the biggest - # error norm here. - # Could do with a better way to sort the different possible types of - # convergence failure... - if t_params.rk_coefs_implicit !== nothing && - composition.electron_physics ∈ (kinetic_electrons, - kinetic_electrons_with_temperature_equation) - if success == "nonlinear-solver" - t_params.failure_caused_by[end-1] += 1 - elseif success == "kinetic-electrons" - t_params.failure_caused_by[end] += 1 - else - error("Unrecognised cause of convergence failure: \"$success\"") - end - else + # Decrease timestep by 1/2 - this factor should probably be settable! + # Note when nonlinear solve iteration fails, we do not enforce + # minimum_dt, as the timesolver must error if we do not decrease dt. + if t_params.dt[] > t_params.minimum_dt + # ...but try decreasing just to minimum_dt first, if the dt is still + # bigger than this. + t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) + else + t_params.dt[] = t_params.dt[] / 2.0 + end + + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 + + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here. + # Could do with a better way to sort the different possible types of + # convergence failure... + if t_params.rk_coefs_implicit !== nothing && + composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + if success == "nonlinear-solver" + t_params.failure_caused_by[end-1] += 1 + elseif success == "kinetic-electrons" t_params.failure_caused_by[end] += 1 + else + error("Unrecognised cause of convergence failure: \"$success\"") end - - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + else + t_params.failure_caused_by[end] += 1 end - elseif (error_norm[] > 1.0 || isnan(error_norm[])) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) + + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false + elseif (error_norm[] > 1.0 || isnan(error_norm[])) && t_params.dt[] > t_params.minimum_dt * (1.0 + 1.0e-13) # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when # t+dt=next_output_time. - # Use current_dt instead of t_params.dt[] here because we are about to write to - # the shared-memory variable t_params.dt[] below, and we do not want to add an - # extra _block_synchronize() call after reading it here. # # Timestep failed, reduce timestep and re-try - @serial_region begin - t_params.failure_counter[] += 1 + t_params.failure_counter[] += 1 - if t_params.previous_dt[] > 0.0 - # If previous_dt=0, the previous step was also a failure so only update - # dt_before_last_fail when previous_dt>0 - t_params.dt_before_last_fail[] = t_params.previous_dt[] - end + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end - # Get new timestep estimate using same formula as for a successful step, but - # limit decrease to factor 1/2 - this factor should probably be settable! - t_params.dt[] = max(t_params.dt[] / 2.0, - t_params.dt[] * t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order)) - t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) + # Get new timestep estimate using same formula as for a successful step, but + # limit decrease to factor 1/2 - this factor should probably be settable! + t_params.dt[] = max(t_params.dt[] / 2.0, + t_params.dt[] * t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order)) + t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) - # Don't update the simulation time, as this step failed - t_params.previous_dt[] = 0.0 + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 - # Call the 'cause' of the timestep failure the variable that has the biggest - # error norm here + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here + @serial_region begin t_params.failure_caused_by[max_error_variable_index] += 1 + end - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false - #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) - end + #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) else - @serial_region begin - # Save the timestep used to complete this step, this is used to update the - # simulation time. - t_params.previous_dt[] = t_params.dt[] - - if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[] - # Completed an output step, reset dt to what it was before it was reduced to reach - # the output time - t_params.dt[] = t_params.dt_before_output[] - - if t_params.step_to_moments_output[] - t_params.step_to_moments_output[] = false - t_params.write_moments_output[] = true - end - if t_params.step_to_dfns_output[] - t_params.step_to_dfns_output[] = false - t_params.write_dfns_output[] = true - end + # Save the timestep used to complete this step, this is used to update the + # simulation time. + t_params.previous_dt[] = t_params.dt[] + + if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[] + # Completed an output step, reset dt to what it was before it was reduced to reach + # the output time + t_params.dt[] = t_params.dt_before_output[] + + if t_params.step_to_moments_output[] + t_params.step_to_moments_output[] = false + t_params.write_moments_output[] = true + end + if t_params.step_to_dfns_output[] + t_params.step_to_dfns_output[] = false + t_params.write_dfns_output[] = true + end - if t_params.dt[] > CFL_limit - t_params.dt[] = CFL_limit - end + if t_params.dt[] > CFL_limit[] + t_params.dt[] = CFL_limit[] + end + else + # Adjust timestep according to Fehlberg's suggestion + # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). + # `step_update_prefactor` is a constant numerical factor to make the estimate + # of a good value for the next timestep slightly conservative. It defaults to + # 0.9. + t_params.dt[] *= t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order) + + if t_params.dt[] > CFL_limit[] + t_params.dt[] = CFL_limit[] else - # Adjust timestep according to Fehlberg's suggestion - # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). - # `step_update_prefactor` is a constant numerical factor to make the estimate - # of a good value for the next timestep slightly conservative. It defaults to - # 0.9. - t_params.dt[] *= t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order) - - if t_params.dt[] > CFL_limit - t_params.dt[] = CFL_limit - else - # Reserve first four entries of t_params.limit_caused_by for - # max_increase_factor, max_increase_factor_near_fail, minimum_dt and - # maximum_dt limits, high_nl_iterations. + # Reserve first four entries of t_params.limit_caused_by for + # max_increase_factor, max_increase_factor_near_fail, minimum_dt and + # maximum_dt limits, high_nl_iterations. + @serial_region begin this_limit_caused_by = 5 + max_error_variable_index end + end - # Limit so timestep cannot increase by a large factor, which might lead to - # numerical instability in some cases. - max_cap_limit_caused_by = 1 - if isinf(t_params.max_increase_factor_near_last_fail) - # Not using special timestep limiting near last failed dt value - max_cap = t_params.max_increase_factor * t_params.previous_dt[] - else - max_cap = t_params.max_increase_factor * t_params.previous_dt[] - slow_increase_threshold = t_params.dt_before_last_fail[] / t_params.last_fail_proximity_factor - if t_params.previous_dt[] > t_params.dt_before_last_fail[] * t_params.last_fail_proximity_factor - # dt has successfully exceeded the last failed value, so allow it - # to increase more quickly again - t_params.dt_before_last_fail[] = Inf - elseif max_cap > slow_increase_threshold - # dt is getting close to last failed value, so increase more - # slowly - max_cap = max(slow_increase_threshold, - t_params.max_increase_factor_near_last_fail * - t_params.previous_dt[]) - max_cap_limit_caused_by = 2 - end + # Limit so timestep cannot increase by a large factor, which might lead to + # numerical instability in some cases. + max_cap_limit_caused_by = 1 + if isinf(t_params.max_increase_factor_near_last_fail) + # Not using special timestep limiting near last failed dt value + max_cap = t_params.max_increase_factor * t_params.previous_dt[] + else + max_cap = t_params.max_increase_factor * t_params.previous_dt[] + slow_increase_threshold = t_params.dt_before_last_fail[] / t_params.last_fail_proximity_factor + if t_params.previous_dt[] > t_params.dt_before_last_fail[] * t_params.last_fail_proximity_factor + # dt has successfully exceeded the last failed value, so allow it + # to increase more quickly again + t_params.dt_before_last_fail[] = Inf + elseif max_cap > slow_increase_threshold + # dt is getting close to last failed value, so increase more + # slowly + max_cap = max(slow_increase_threshold, + t_params.max_increase_factor_near_last_fail * + t_params.previous_dt[]) + max_cap_limit_caused_by = 2 end - if t_params.dt[] > max_cap - t_params.dt[] = max_cap + end + if t_params.dt[] > max_cap + t_params.dt[] = max_cap + @serial_region begin this_limit_caused_by = max_cap_limit_caused_by end + end - # Prevent timestep from going below minimum_dt - if t_params.dt[] < t_params.minimum_dt - t_params.dt[] = t_params.minimum_dt + # Prevent timestep from going below minimum_dt + if t_params.dt[] < t_params.minimum_dt + t_params.dt[] = t_params.minimum_dt + @serial_region begin this_limit_caused_by = 3 end + end - # Prevent timestep from going above maximum_dt - max_dt = min(t_params.maximum_dt, local_max_dt) - if t_params.dt[] > max_dt - t_params.dt[] = max_dt + # Prevent timestep from going above maximum_dt + max_dt = min(t_params.maximum_dt, local_max_dt) + if t_params.dt[] > max_dt + t_params.dt[] = max_dt + @serial_region begin this_limit_caused_by = 4 end + end - if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 - # The last step took many nonlinear iterations, so do not allow the - # timestep to increase. - # If t_params.previous_dt[]==0.0, then the previous step failed so - # timestep will not be increasing, so do not need this check. - if t_params.dt[] > t_params.previous_dt[] - t_params.dt[] = t_params.previous_dt[] + if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 + # The last step took many nonlinear iterations, so do not allow the + # timestep to increase. + # If t_params.previous_dt[]==0.0, then the previous step failed so + # timestep will not be increasing, so do not need this check. + if t_params.dt[] > t_params.previous_dt[] + t_params.dt[] = t_params.previous_dt[] + @serial_region begin this_limit_caused_by = 5 end end + end + @serial_region begin t_params.limit_caused_by[this_limit_caused_by] += 1 + end - if (t_params.step_counter[] % 1000 == 0) && global_rank[] == 0 - prefix = electron ? "electron" : "ion" - println("$prefix step ", t_params.step_counter[], ": t=", - round(t_params.t[], sigdigits=6), ", nfail=", - t_params.failure_counter[], ", dt=", t_params.dt[]) - end + if (t_params.step_counter[] % 1000 == 0) && global_rank[] == 0 + prefix = electron ? "electron" : "ion" + println("$prefix step ", t_params.step_counter[], ": t=", + round(t_params.t[], sigdigits=6), ", nfail=", + t_params.failure_counter[], ", dt=", t_params.dt[]) end end end - @serial_region begin - minimum_dt = 1.e-14 - if t_params.dt[] < minimum_dt - println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " - * "$minimum_dt at t=$(t_params.t[]). Ending run.") - # Set dt negative to signal an error - t_params.dt[] = -1.0 - end - - current_time = t_params.t[] + t_params.previous_dt[] - # Store here to ensure dt_before_output is set correctly when both moments and - # dfns are written at the same time. - current_dt = t_params.dt[] - if (!t_params.write_after_fixed_step_count - && !t_params.write_moments_output[] - && length(t_params.moments_output_times) > 0 - && (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times)) - && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]])) - - t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time - t_params.step_to_moments_output[] = true - - if t_params.dt[] < 0.0 - error("When trying to step to next output time, made negative timestep " - * "dt=$(t_params.dt[])") - end - end - if (!t_params.write_after_fixed_step_count - && !t_params.write_dfns_output[] - && length(t_params.dfns_output_times) > 0 - && (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times)) - && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]])) + minimum_dt = 1.e-14 + if t_params.dt[] < minimum_dt + println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " + * "$minimum_dt at t=$(t_params.t[]). Ending run.") + # Set dt negative to signal an error + t_params.dt[] = -1.0 + end - t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time - t_params.step_to_dfns_output[] = true + current_time = t_params.t[] + t_params.previous_dt[] + # Store here to ensure dt_before_output is set correctly when both moments and + # dfns are written at the same time. + current_dt = t_params.dt[] + if (!t_params.write_after_fixed_step_count + && !t_params.write_moments_output[] + && length(t_params.moments_output_times) > 0 + && (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times)) + && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]])) - if t_params.dt[] < 0.0 - error("When trying to step to next output time, made negative timestep " - * "dt=$(t_params.dt[])") - end + t_params.dt_before_output[] = current_dt + t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time + t_params.step_to_moments_output[] = true + + if t_params.dt[] < 0.0 + error("When trying to step to next output time, made negative timestep " + * "dt=$(t_params.dt[])") end end + if (!t_params.write_after_fixed_step_count + && !t_params.write_dfns_output[] + && length(t_params.dfns_output_times) > 0 + && (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times)) + && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]])) + + t_params.dt_before_output[] = current_dt + t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time + t_params.step_to_dfns_output[] = true - # Shared-memory variables have been updated, so synchronize - _block_synchronize() + if t_params.dt[] < 0.0 + error("When trying to step to next output time, made negative timestep " + * "dt=$(t_params.dt[])") + end + end return nothing end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 0b888b77e..06b5219c5 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -341,29 +341,15 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, * "`write_after_fixed_step_count=true`.") end - t_shared = allocate_shared_float(1) - dt_shared = allocate_shared_float(1) - previous_dt_shared = allocate_shared_float(1) - next_output_time = allocate_shared_float(1) - dt_before_output = allocate_shared_float(1) - dt_before_last_fail = allocate_shared_float(1) - step_to_moments_output = allocate_shared_bool(1) - step_to_dfns_output = allocate_shared_bool(1) - write_moments_output = allocate_shared_bool(1) - write_dfns_output = allocate_shared_bool(1) - if block_rank[] == 0 - t_shared[] = code_time - dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload - previous_dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload - next_output_time[] = 0.0 - dt_before_output[] = dt_reload === nothing ? t_input["dt"] : dt_reload - dt_before_last_fail[] = dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload - step_to_moments_output[] = false - step_to_dfns_output[] = false - write_moments_output[] = false - write_dfns_output[] = false - end - _block_synchronize() + t = Ref(code_time) + dt = Ref(dt_reload === nothing ? t_input["dt"] : dt_reload) + previous_dt = Ref(dt[]) + dt_before_output = Ref(dt[]) + dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload) + step_to_moments_output = Ref(false) + step_to_dfns_output = Ref(false) + write_moments_output = Ref(false) + write_dfns_output = Ref(false) end_time = code_time + t_input["dt"] * t_input["nstep"] epsilon = 1.e-11 @@ -451,17 +437,16 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, cap_factor_ion_dt = Inf electron_t_params = electron end - return time_info(n_variables, t_input["nstep"], end_time, t_shared, dt_shared, - previous_dt_shared, next_output_time, dt_before_output, - dt_before_last_fail, CFL_prefactor, step_to_moments_output, - step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), - Ref(0), Ref{mk_float}(0.0), Ref(0), Ref(0), Ref(0), mk_int[], - mk_int[], t_input["nwrite"], t_input["nwrite_dfns"], - moments_output_times, dfns_output_times, t_input["type"], rk_coefs, - rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, - rk_order, adaptive, low_storage, t_input["rtol"], t_input["atol"], - t_input["atol_upar"], t_input["step_update_prefactor"], - t_input["max_increase_factor"], + return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, + dt_before_output, dt_before_last_fail, CFL_prefactor, + step_to_moments_output, step_to_dfns_output, write_moments_output, + write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0), + Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], + t_input["nwrite_dfns"], moments_output_times, dfns_output_times, + t_input["type"], rk_coefs, rk_coefs_implicit, + implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, + low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"], + t_input["step_update_prefactor"], t_input["max_increase_factor"], t_input["max_increase_factor_near_last_fail"], t_input["last_fail_proximity_factor"], t_input["minimum_dt"], t_input["maximum_dt"], @@ -1809,10 +1794,7 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param diagnostic_checks, t_params.step_counter[]) end # update the time - @serial_region begin - t_params.t[] += t_params.previous_dt[] - end - _block_synchronize() + t_params.t[] += t_params.previous_dt[] if t_params.t[] ≥ t_params.end_time - epsilon || (t_params.write_after_fixed_step_count && @@ -1838,11 +1820,8 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param write_moments = t_params.write_moments_output[] || finish_now write_dfns = t_params.write_dfns_output[] || finish_now - _block_synchronize() - @serial_region begin - t_params.write_moments_output[] = false - t_params.write_dfns_output[] = false - end + t_params.write_moments_output[] = false + t_params.write_dfns_output[] = false else write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 || t_params.step_counter[] >= t_params.nstep @@ -2536,11 +2515,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, error_norms = error_norm_type[] total_points = mk_int[] - # Read the current dt here, so we only need one _block_synchronize() call for this and - # the begin_s_r_z_vperp_vpa_region() - current_dt = t_params.dt[] - _block_synchronize() - # Test CFL conditions for advection in kinetic equation to give stability limit for # timestep # @@ -2827,8 +2801,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, end adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, - current_dt, error_norm_method, success, - nl_max_its_fraction, composition) + error_norm_method, success, nl_max_its_fraction, + composition) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) From 954a6469fc57d701915c8c74be24b7d4f9e5db63 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 29 Sep 2024 21:44:56 +0100 Subject: [PATCH 104/107] Re-enable tests on macOS Nonlinear solver (JFNK) seems to work on macOS now. --- .github/workflows/examples.yml | 6 +--- .../test/braginskii_electrons_imex_tests.jl | 29 ++++++++----------- .../test/kinetic_electron_tests.jl | 5 ---- .../test/nonlinear_solver_tests.jl | 16 +++------- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index b4332f3f4..e38a824e9 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -23,8 +23,4 @@ jobs: touch Project.toml julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()' # Reduce nstep for each example to 10 to avoid the CI job taking too long - # Note we skip the example `if (occursin("ARK", get(t_input, "type", "") && Sys.isapple())` - # because the way we use MINPACK.jl (needed for nonlinear solvers - # used for implicit parts of timestep) doesn't currently work on - # macOS. - julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if ((occursin("ARK", get(t_input, "type", "")) || occursin("PareschiRusso", get(t_input, "type", "")) || occursin("kinetic_electrons", get(get(input, "composition", Dict{String,Any}()), "electron_physics", "boltzmann_electron_response"))) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' + julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' diff --git a/moment_kinetics/test/braginskii_electrons_imex_tests.jl b/moment_kinetics/test/braginskii_electrons_imex_tests.jl index 5a2ba1daa..1104271f3 100644 --- a/moment_kinetics/test/braginskii_electrons_imex_tests.jl +++ b/moment_kinetics/test/braginskii_electrons_imex_tests.jl @@ -277,24 +277,19 @@ function runtests() @testset "Braginskii electron IMEX timestepping" verbose=use_verbose begin println("Braginskii electron IMEX timestepping tests") - if Sys.isapple() - @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin - end - else - @testset "Split 3" begin - test_input["output"]["base_directory"] = test_output_directory - run_test(test_input, expected_p, expected_q, expected_vt) - end - @long @testset "Check other timestep - $type" for - type ∈ ("KennedyCarpenterARK437",) + @testset "Split 3" begin + test_input["output"]["base_directory"] = test_output_directory + run_test(test_input, expected_p, expected_q, expected_vt) + end + @long @testset "Check other timestep - $type" for + type ∈ ("KennedyCarpenterARK437",) - timestep_check_input = deepcopy(test_input) - timestep_check_input["output"]["base_directory"] = test_output_directory - timestep_check_input["output"]["run_name"] = type - timestep_check_input["timestepping"]["type"] = type - run_test(timestep_check_input, expected_p, expected_q, expected_vt, - rtol=2.e-4, atol=1.e-10) - end + timestep_check_input = deepcopy(test_input) + timestep_check_input["output"]["base_directory"] = test_output_directory + timestep_check_input["output"]["run_name"] = type + timestep_check_input["timestepping"]["type"] = type + run_test(timestep_check_input, expected_p, expected_q, expected_vt, + rtol=2.e-4, atol=1.e-10) end end diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index cad2967ed..6e58393e2 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -271,11 +271,6 @@ function run_test() end function runtests() - if Sys.isapple() - @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin - end - return nothing - end @testset "kinetic electrons" begin println("Kinetic electron tests") run_test() diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 2d609398f..ab68389f4 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -272,18 +272,10 @@ function nonlinear_test() end function runtests() - if Sys.isapple() - @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin - println("non-linear solver tests") - linear_test() - nonlinear_test() - end - else - @testset "non-linear solvers" begin - println("non-linear solver tests") - linear_test() - nonlinear_test() - end + @testset "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() end end From a1696824c14f2c2a8ab13957890c3226673fcc77 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 29 Sep 2024 21:47:52 +0100 Subject: [PATCH 105/107] Tidy up formatting of test prints --- moment_kinetics/test/fokker_planck_tests.jl | 24 +++++++++---------- moment_kinetics/test/jacobian_matrix_tests.jl | 20 ++++++++-------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl index 8f2dafbd7..cd29e94ca 100644 --- a/moment_kinetics/test/fokker_planck_tests.jl +++ b/moment_kinetics/test/fokker_planck_tests.jl @@ -66,8 +66,8 @@ function runtests() @testset "Fokker Planck tests" verbose=use_verbose begin println("Fokker Planck tests") - @testset " - test Lagrange-polynomial 2D interpolation" begin - println(" - test Lagrange-polynomial 2D interpolation") + @testset "Lagrange-polynomial 2D interpolation" begin + println(" - test Lagrange-polynomial 2D interpolation") ngrid = 9 nelement_vpa = 16 nelement_vperp = 8 @@ -140,9 +140,9 @@ function runtests() end - @testset " - test weak-form 2D differentiation" begin + @testset "weak-form 2D differentiation" begin # tests the correct definition of mass and stiffness matrices in 2D - println(" - test weak-form 2D differentiation") + println(" - test weak-form 2D differentiation") ngrid = 9 nelement_vpa = 8 @@ -205,8 +205,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form Rosenbluth potential calculation: elliptic solve" begin - println(" - test weak-form Rosenbluth potential calculation: elliptic solve") + @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin + println(" - test weak-form Rosenbluth potential calculation: elliptic solve") ngrid = 9 nelement_vpa = 8 nelement_vperp = 4 @@ -353,8 +353,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form collision operator calculation" begin - println(" - test weak-form collision operator calculation") + @testset "weak-form collision operator calculation" begin + println(" - test weak-form collision operator calculation") ngrid = 9 nelement_vpa = 8 nelement_vperp = 4 @@ -510,8 +510,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form (slowing-down) collision operator calculation" begin - println(" - test weak-form (slowing-down) collision operator calculation") + @testset "weak-form (slowing-down) collision operator calculation" begin + println(" - test weak-form (slowing-down) collision operator calculation") ngrid = 9 nelement_vpa = 16 nelement_vperp = 8 @@ -607,8 +607,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form Rosenbluth potential calculation: direct integration" begin - println(" - test weak-form Rosenbluth potential calculation: direct integration") + @testset "weak-form Rosenbluth potential calculation: direct integration" begin + println(" - test weak-form Rosenbluth potential calculation: direct integration") ngrid = 5 # chosen for a quick test -- direct integration is slow! nelement_vpa = 8 nelement_vperp = 4 diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index b739bc76a..7e48cc80f 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -225,7 +225,7 @@ end function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_z_advection" - println(" electron_z_advection") + println(" - electron_z_advection") @testset "electron_z_advection" begin # Suppress console output while running @@ -453,7 +453,7 @@ end function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_vpa_advection" - println(" electron_vpa_advection") + println(" - electron_vpa_advection") @testset "electron_vpa_advection" begin # Suppress console output while running @@ -721,7 +721,7 @@ end function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_contribution_from_electron_pdf_term" - println(" contribution_from_electron_pdf_term") + println(" - contribution_from_electron_pdf_term") @testset "contribution_from_electron_pdf_term" begin # Suppress console output while running @@ -975,7 +975,7 @@ end function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_dissipation_term" - println(" electron_dissipation_term") + println(" - electron_dissipation_term") @testset "electron_dissipation_term" begin # Suppress console output while running @@ -1207,7 +1207,7 @@ end function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_krook_collisions" - println(" electron_krook_collisions") + println(" - electron_krook_collisions") @testset "electron_krook_collisions" begin # Suppress console output while running @@ -1445,7 +1445,7 @@ end function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_external_electron_source" - println(" external_electron_source") + println(" - external_electron_source") @testset "external_electron_source" begin # Suppress console output while running @@ -1718,7 +1718,7 @@ end function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsilon)) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_implicit_constraint_forcing" - println(" electron_implicit_constraint_forcing") + println(" - electron_implicit_constraint_forcing") @testset "electron_implicit_constraint_forcing" begin # Suppress console output while running @@ -1953,7 +1953,7 @@ end function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_energy_equation" - println(" electron_energy_equation") + println(" - electron_energy_equation") @testset "electron_energy_equation" begin # Suppress console output while running @@ -2162,7 +2162,7 @@ end function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_ion_dt_forcing_of_electron_ppar" - println(" ion_dt_forcing_of_electron_ppar") + println(" - ion_dt_forcing_of_electron_ppar") @testset "ion_dt_forcing_of_electron_ppar" begin # Suppress console output while running @@ -2357,7 +2357,7 @@ end function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) test_input = deepcopy(test_input) test_input["output"]["run_name"] *= "_electron_kinetic_equation" - println(" electron_kinetic_equation") + println(" - electron_kinetic_equation") @testset "electron_kinetic_equation" begin # Suppress console output while running From e75d6acd32dad0c22bee76d7ffc4a490a027ed03 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 29 Sep 2024 19:47:16 +0100 Subject: [PATCH 106/107] Tweak parameters of kinetic electron test and update expected results Better electron timestep size for kinetic electron test --- .../test/kinetic_electron_tests.jl | 138 +++++++++--------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 6e58393e2..723da1be2 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -108,10 +108,10 @@ kinetic_input["timestepping"] = OptionsDict("type" => "PareschiRusso2(2,2,2)", ) kinetic_input["electron_timestepping"] = OptionsDict("nstep" => 5000000, - "dt" => 1.0e-5, + "dt" => 5.0e-6, "nwrite" => 10000, "nwrite_dfns" => 100000, - "decrease_dt_iteration_threshold" => 1000, + "decrease_dt_iteration_threshold" => 5000, "increase_dt_iteration_threshold" => 0, "cap_factor_ion_dt" => 10.0, "initialization_residual_value" => 1.0e10, @@ -172,72 +172,72 @@ function run_test() # Regression test # Benchmark data generated in serial on Linux - expected_Ez = [-0.5990683230706185 -0.6042082363495851; - -0.4944296396481284 -0.49692371894536586; - -0.30889032954504736 -0.3090990586904173; - -0.2064830747303776 -0.20700297720010077; - -0.21232457328748663 -0.2132748045598696; - -0.18233875912042674 -0.18276920923500758; - -0.16711429522309232 -0.1674324272230308; - -0.16920776495088916 -0.16937992443371716; - -0.1629417555658927 -0.16309341722744303; - -0.16619150334079993 -0.16633546753735795; - -0.15918194883360942 -0.15931554370144113; - -0.14034706409006803 -0.140469880250037; - -0.12602184032280567 -0.12613381924054493; - -0.10928716440800472 -0.10938345602505639; - -0.07053969674257217 -0.0706024520856333; - -0.0249577746169536 -0.024980098134854842; - -2.8327303308330514e-15 -1.599033453711614e-10; - 0.024957774616960776 0.02498009782733815; - 0.07053969674257636 0.07060245115760132; - 0.10928716440799909 0.10938345732933795; - 0.1260218403227975 0.1261338225947928; - 0.1403470640900294 0.14046988178255268; - 0.1591819488336015 0.15931556545456152; - 0.16619150334082114 0.1663353993955267; - 0.16294175556587748 0.16309307445724816; - 0.16920776495090983 0.1693805039915145; - 0.1671142952230893 0.1674318780154963; - 0.1823387591204167 0.18277420263305205; - 0.21232457328753865 0.21326329266495697; - 0.20648307473037922 0.20700517064938181; - 0.3088903295450278 0.3091144991453789; - 0.4944296396481271 0.49684270193048663; - 0.5990683230705801 0.6040141042995336] - expected_vthe = [27.08122333732766 27.083668406411196; - 27.087128061238488 27.08840157326006; - 27.090525010446868 27.090443986816897; - 27.091202856161452 27.0914901864659; - 27.09265674296987 27.093297466503625; - 27.093298138334738 27.09337068853881; - 27.094377689895747 27.094548022524926; - 27.09501542767647 27.095170446421935; - 27.095227831625575 27.095304545176944; - 27.095420218946682 27.09555512096241; - 27.095754478126825 27.095876494374046; - 27.096054218271775 27.096188914603825; - 27.096199500698383 27.096294431476554; - 27.09632238748948 27.096423453543142; - 27.096502792691805 27.096594041947167; - 27.096597492028636 27.096694147970585; - 27.096610989303674 27.096702959927107; - 27.096597492397745 27.096694148339555; - 27.096502794930903 27.096594044186332; - 27.096322390449956 27.09642345650393; - 27.096199499205674 27.096294429984052; - 27.09605421760595 27.096188913937898; - 27.095754438597055 27.095876454845936; - 27.09542019655419 27.095555098545283; - 27.095228009815475 27.095304723869976; - 27.095015217848847 27.09517023619458; - 27.094377437638478 27.09454777080713; - 27.093294828184774 27.093367377705533; - 27.092639150183448 27.09327987116632; - 27.0912092735745 27.091496606764487; - 27.09048496370012 27.090403937882265; - 27.08714601914595 27.08841951855733; - 27.08144246136634 27.08388753119234] + expected_Ez = [-0.5990683230706185 -0.604849806235434; + -0.4944296396481284 -0.49739671491727844; + -0.30889032954504736 -0.30924318765687464; + -0.2064830747303776 -0.20682475071884582; + -0.21232457328748663 -0.21299072376949116; + -0.18233875912042674 -0.18256905463006085; + -0.16711429522309232 -0.1673112962636778; + -0.16920776495088916 -0.1693227707158167; + -0.1629417555658927 -0.16304933113558318; + -0.16619150334079993 -0.16629539618289285; + -0.15918194883360942 -0.1592799009526323; + -0.14034706409006803 -0.140437217833422; + -0.12602184032280567 -0.12610387949683538; + -0.10928716440800472 -0.10935785133612701; + -0.07053969674257217 -0.07058573063123225; + -0.0249577746169536 -0.024974174596810936; + -2.8327303308330514e-15 -1.441401377024236e-10; + 0.024957774616960776 0.02497417427570905; + 0.07053969674257636 0.07058572965952663; + 0.10928716440799909 0.10935785264749627; + 0.1260218403227975 0.12610388283669527; + 0.1403470640900294 0.1404372197714126; + 0.1591819488336015 0.15927992284761766; + 0.16619150334082114 0.1662953275454769; + 0.16294175556587748 0.1630489871826757; + 0.16920776495090983 0.1693233489685909; + 0.1671142952230893 0.16731075590341918; + 0.1823387591204167 0.1825740389953209; + 0.21232457328753865 0.21297925141919793; + 0.20648307473037922 0.20682690396901446; + 0.3088903295450278 0.30925854110074175; + 0.4944296396481271 0.49731601862961966; + 0.5990683230705801 0.6046564647413697] + expected_vthe = [27.08102229345079 27.08346736523219; + 27.087730258479823 27.089003820908527; + 27.091898844901323 27.09181784480061; + 27.092455021687254 27.092742387764524; + 27.09350739287911 27.094148133125078; + 27.093817059011126 27.093889601910092; + 27.09443981315218 27.094610141036807; + 27.09484177005478 27.094996783801374; + 27.094985914811055 27.0950626278904; + 27.095122128675094 27.09525702879687; + 27.09536357532887 27.09548558966323; + 27.095582117080163 27.095716810823177; + 27.09568783962135 27.09578276803757; + 27.0957775472326 27.095878610625554; + 27.095909169276535 27.09600041573683; + 27.095978269355648 27.096074922150624; + 27.095988166679223 27.096080134292468; + 27.095978269713978 27.096074922508883; + 27.095909171602027 27.096000418062378; + 27.09577755035281 27.095878613746088; + 27.095687838236376 27.095782766652857; + 27.09558211622511 27.095716809968053; + 27.09536353456768 27.09548554890375; + 27.095122105596843 27.095257005693973; + 27.094986093051983 27.09506280663278; + 27.094841563692096 27.094996577040796; + 27.094439553087433 27.094609881510113; + 27.093813728418613 27.09388627063591; + 27.093489818175936 27.094130555874184; + 27.09246140309467 27.092748772044477; + 27.09185903467811 27.09177803239964; + 27.08774827015981 27.089021820036553; + 27.081240668889404 27.0836857414255] if expected_Ez == nothing # Error: no expected input provided @@ -257,7 +257,7 @@ function run_test() # Iteration counts are fairly inconsistent, but it's good to check that they at # least don't unexpectedly increase by an order of magnitude. # Expected iteration count is from a serial run on Linux. - expected_electron_advance_linear_iterations = 10695 + expected_electron_advance_linear_iterations = 11394 @test electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations if !(electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations) println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") From 65a216428fb788c826e6dfbad29b3200894e9fcb Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 30 Sep 2024 16:46:33 +0100 Subject: [PATCH 107/107] Allow plots/animations of unnormalised f_electron --- .../src/makie_post_processing.jl | 219 +++++++++++------- 1 file changed, 134 insertions(+), 85 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index befa46972..9acef44c4 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -1129,6 +1129,7 @@ function plots_for_dfn_variable(run_info, variable_name; plot_prefix, has_rdim=t input = Dict_to_NamedTuple(input_dict_dfns[variable_name]) is_neutral = variable_name ∈ neutral_dfn_variables + is_electron = variable_name ∈ electron_dfn_variables if is_neutral animate_dims = setdiff(neutral_dimensions, (:sn,)) @@ -1245,23 +1246,30 @@ function plots_for_dfn_variable(run_info, variable_name; plot_prefix, has_rdim=t else if input[Symbol(:plot, log, :_unnorm_vs_vpa)] outfile = var_prefix * "unnorm_vs_vpa.pdf" - plot_f_unnorm_vs_vpa(run_info; input=input, is=is, outfile=outfile, - yscale=yscale, transform=transform) + plot_f_unnorm_vs_vpa(run_info; input=input, electron=is_electron, + is=is, outfile=outfile, yscale=yscale, + transform=transform) end if has_zdim && input[Symbol(:plot, log, :_unnorm_vs_vpa_z)] outfile = var_prefix * "unnorm_vs_vpa_z.pdf" - plot_f_unnorm_vs_vpa_z(run_info; input=input, is=is, outfile=outfile, - colorscale=yscale, transform=transform) + plot_f_unnorm_vs_vpa_z(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, colorscale=yscale, + transform=transform) end if input[Symbol(:animate, log, :_unnorm_vs_vpa)] outfile = var_prefix * "unnorm_vs_vpa." * input.animation_ext - animate_f_unnorm_vs_vpa(run_info; input=input, is=is, outfile=outfile, - yscale=yscale, transform=transform) + animate_f_unnorm_vs_vpa(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, yscale=yscale, + transform=transform) end if has_zdim && input[Symbol(:animate, log, :_unnorm_vs_vpa_z)] outfile = var_prefix * "unnorm_vs_vpa_z." * input.animation_ext - animate_f_unnorm_vs_vpa_z(run_info; input=input, is=is, outfile=outfile, - colorscale=yscale, transform=transform) + animate_f_unnorm_vs_vpa_z(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, colorscale=yscale, + transform=transform) end end check_moment_constraints(run_info, is_neutral; input=input, plot_prefix) @@ -3801,9 +3809,9 @@ function calculate_steady_state_residual(run_info, variable_name; is=1, data=not end """ - plot_f_unnorm_vs_vpa(run_info; input=nothing, neutral=false, it=nothing, is=1, - iz=nothing, fig=nothing, ax=nothing, outfile=nothing, - yscale=identity, transform=identity, + plot_f_unnorm_vs_vpa(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, iz=nothing, fig=nothing, ax=nothing, + outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) Plot an unnormalized distribution function against \$v_\\parallel\$ at a fixed z. @@ -3815,8 +3823,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are overlayed on the same axis. -By default plots the ion distribution function. If `neutrals=true` is passed, plots the -neutral distribution function instead. +By default plots the ion distribution function. If `electron=true` is passed, plots the +electron distribution function instead. If `neutral=true` is passed, plots the neutral +distribution function instead. `is` selects which species to analyse. @@ -3848,8 +3857,9 @@ Any extra `kwargs` are passed to [`plot_1d`](@ref). """ function plot_f_unnorm_vs_vpa end -function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, - outfile=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) +function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=false, + neutral=false, outfile=nothing, + axis_args=Dict{Symbol,Any}(), kwargs...) try n_runs = length(run_info) @@ -3859,8 +3869,8 @@ function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, fig, ax = get_1d_ax(; xlabel=L"v_\parallel", ylabel=ylabel, axis_args...) for ri ∈ run_info - plot_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, neutral=neutral, ax=ax, - kwargs...) + plot_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, electron=electron, + neutral=neutral, ax=ax, kwargs...) end if n_runs > 1 @@ -3879,10 +3889,16 @@ function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, end end -function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutral=false, - it=nothing, is=1, iz=nothing, fig=nothing, ax=nothing, - outfile=nothing, transform=identity, +function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, electron=false, + neutral=false, it=nothing, is=1, iz=nothing, fig=nothing, + ax=nothing, outfile=nothing, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -3901,7 +3917,7 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra end if ax === nothing - species_label = neutral ? "n" : "i" + species_label = neutral ? "n" : electron ? "e" : "i" divide_by = f_over_vpa2 ? L"/v_\parallel^2" : "" ylabel = L"f_{%$species_label,\mathrm{unnormalized}}%$divide_by" fig, ax = get_1d_ax(; xlabel=L"v_\parallel", ylabel=ylabel, axis_args...) @@ -3917,11 +3933,13 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra iz=iz) vcoord = run_info.vz else - f = get_variable(run_info, "f"; it=it, is=is, ir=input.ir0, iz=iz, + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = get_variable(run_info, "f$suffix"; it=it, is=is, ir=input.ir0, iz=iz, ivperp=input.ivperp0) - density = get_variable(run_info, "density"; it=it, is=is, ir=input.ir0, iz=iz) - upar = get_variable(run_info, "parallel_flow"; it=it, is=is, ir=input.ir0, iz=iz) - vth = get_variable(run_info, "thermal_speed"; it=it, is=is, ir=input.ir0, iz=iz) + density = get_variable(run_info, "$(prefix)density"; it=it, is=is, ir=input.ir0, iz=iz) + upar = get_variable(run_info, "$(prefix)parallel_flow"; it=it, is=is, ir=input.ir0, iz=iz) + vth = get_variable(run_info, "$(prefix)thermal_speed"; it=it, is=is, ir=input.ir0, iz=iz) vcoord = run_info.vpa end @@ -3967,10 +3985,10 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra end """ - plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothing, is=1, - fig=nothing, ax=nothing, outfile=nothing, yscale=identity, - transform=identity, rasterize=true, subtitles=nothing, - axis_args=Dict{Symbol,Any}(), kwargs...) + plot_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, fig=nothing, ax=nothing, outfile=nothing, + yscale=identity, transform=identity, rasterize=true, + subtitles=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) Plot unnormalized distribution function against \$v_\\parallel\$ and z. @@ -3981,8 +3999,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are displayed in a horizontal row. -By default plots the ion distribution function. If `neutrals=true` is passed, plots the -neutral distribution function instead. +By default plots the ion distribution function. If `electron=true` is passed, plots the +electron distribution function instead. If `neutral=true` is passed, plots the neutral +distribution function instead. `is` selects which species to analyse. @@ -4021,24 +4040,24 @@ Any extra `kwargs` are passed to [`plot_2d`](@ref). """ function plot_f_unnorm_vs_vpa_z end -function plot_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, - axis_args=Dict{Symbol,Any}(), title=nothing, - subtitles=nothing, kwargs...) +function plot_f_unnorm_vs_vpa_z(run_info::Tuple; electron=false, neutral=false, + outfile=nothing, axis_args=Dict{Symbol,Any}(), + title=nothing, subtitles=nothing, kwargs...) try n_runs = length(run_info) if subtitles === nothing subtitles = Tuple(nothing for _ ∈ 1:n_runs) end if title !== nothing - title = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + title = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" end fig, axes, colorbar_places = get_2d_ax(n_runs; title=title, xlabel=L"v_\parallel", ylabel=L"z", axis_args...) for (ri, ax, colorbar_place, st) ∈ zip(run_info, axes, colorbar_places, subtitles) - plot_f_unnorm_vs_vpa_z(ri; neutral=neutral, ax=ax, colorbar_place=colorbar_place, - title=st, kwargs...) + plot_f_unnorm_vs_vpa_z(ri; electron=electron, neutral=neutral, ax=ax, + colorbar_place=colorbar_place, title=st, kwargs...) end if outfile !== nothing @@ -4053,10 +4072,17 @@ function plot_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, end end -function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothing, is=1, - fig=nothing, ax=nothing, colorbar_place=nothing, title=nothing, - outfile=nothing, transform=identity, rasterize=true, +function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, fig=nothing, ax=nothing, + colorbar_place=nothing, title=nothing, outfile=nothing, + transform=identity, rasterize=true, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4073,7 +4099,7 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi if ax === nothing if title === nothing - title = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + title = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" end fig, ax, colorbar_place = get_2d_ax(; title=title, xlabel=L"v_\parallel", ylabel=L"z", axis_args...) @@ -4093,10 +4119,12 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi vth = get_variable(run_info, "thermal_speed_neutral"; it=it, is=is, ir=input.ir0) vpa_grid = run_info.vz.grid else - f = get_variable(run_info, "f"; it=it, is=is, ir=input.ir0, ivperp=input.ivperp0) - density = get_variable(run_info, "density"; it=it, is=is, ir=input.ir0) - upar = get_variable(run_info, "parallel_flow"; it=it, is=is, ir=input.ir0) - vth = get_variable(run_info, "thermal_speed"; it=it, is=is, ir=input.ir0) + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = get_variable(run_info, "f$suffix"; it=it, is=is, ir=input.ir0, ivperp=input.ivperp0) + density = get_variable(run_info, "$(prefix)density"; it=it, is=is, ir=input.ir0) + upar = get_variable(run_info, "$(prefix)parallel_flow"; it=it, is=is, ir=input.ir0) + vth = get_variable(run_info, "$(prefix)thermal_speed"; it=it, is=is, ir=input.ir0) vpa_grid = run_info.vpa.grid end @@ -4128,8 +4156,8 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi end """ - animate_f_unnorm_vs_vpa(run_info; input=nothing, neutral=false, is=1, iz=nothing, - fig=nothing, ax=nothing, frame_index=nothing, + animate_f_unnorm_vs_vpa(run_info; input=nothing, electron=false, neutral=false, is=1, + iz=nothing, fig=nothing, ax=nothing, frame_index=nothing, outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) @@ -4142,8 +4170,9 @@ The information for the runs to animate is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are overlayed on the same axis. -By default animates the ion distribution function. If `neutrals=true` is passed, animates -the neutral distribution function instead. +By default animates the ion distribution function. If `electron=true` is passed, animates +the electron distribution function instead. If `neutral=true` is passed, animates the +neutral distribution function instead. `is` selects which species to analyse. @@ -4178,14 +4207,15 @@ to handle time-varying coordinates so cannot use [`animate_1d`](@ref)). """ function animate_f_unnorm_vs_vpa end -function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, - outfile=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) +function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=false, + neutral=false, outfile=nothing, + axis_args=Dict{Symbol,Any}(), kwargs...) try n_runs = length(run_info) frame_index = Observable(1) - species_label = neutral ? "n" : "i" + species_label = neutral ? "n" : electron ? "e" : "i" divide_by = f_over_vpa2 ? L"/v_\parallel^2" : "" ylabel = L"f_{%$species_label,\mathrm{unnormalized}}%$divide_by" if length(run_info) == 1 || all(all(isapprox.(ri.time, run_info[1].time)) for ri ∈ run_info[2:end]) @@ -4200,8 +4230,9 @@ function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=fal axis_args...) for ri ∈ run_info - animate_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, neutral=neutral, ax=ax, - frame_index=frame_index, kwargs...) + animate_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, electron=electron, + neutral=neutral, ax=ax, frame_index=frame_index, + kwargs...) end if n_runs > 1 @@ -4222,10 +4253,16 @@ function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=fal end function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, - neutral=false, is=1, iz=nothing, fig=nothing, ax=nothing, - frame_index=nothing, outfile=nothing, yscale=nothing, - transform=identity, axis_args=Dict{Symbol,Any}(), - kwargs...) + electron=false, neutral=false, is=1, iz=nothing, + fig=nothing, ax=nothing, frame_index=nothing, + outfile=nothing, yscale=nothing, transform=identity, + axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4262,12 +4299,14 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, vth = get_variable(run_info, "thermal_speed_neutral"; is=is, ir=input.ir0, iz=iz) vcoord = run_info.vz else - f = VariableCache(run_info, "f", chunk_size_2d; it=nothing, is=is, ir=input.ir0, iz=iz, - ivperp=input.ivperp0, ivpa=nothing, ivzeta=nothing, ivr=nothing, - ivz=nothing) - density = get_variable(run_info, "density"; is=is, ir=input.ir0, iz=iz) - upar = get_variable(run_info, "parallel_flow"; is=is, ir=input.ir0, iz=iz) - vth = get_variable(run_info, "thermal_speed"; is=is, ir=input.ir0, iz=iz) + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = VariableCache(run_info, "f$suffix", chunk_size_2d; it=nothing, is=is, + ir=input.ir0, iz=iz, ivperp=input.ivperp0, ivpa=nothing, + ivzeta=nothing, ivr=nothing, ivz=nothing) + density = get_variable(run_info, "$(prefix)density"; is=is, ir=input.ir0, iz=iz) + upar = get_variable(run_info, "$(prefix)parallel_flow"; is=is, ir=input.ir0, iz=iz) + vth = get_variable(run_info, "$(prefix)thermal_speed"; is=is, ir=input.ir0, iz=iz) vcoord = run_info.vpa end @@ -4351,8 +4390,8 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, end """ - animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, - fig=nothing, ax=nothing, frame_index=nothing, + animate_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + is=1, fig=nothing, ax=nothing, frame_index=nothing, outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) @@ -4365,8 +4404,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are displayed in a horizontal row. -By default animates the ion distribution function. If `neutrals=true` is passed, animates -the neutral distribution function instead. +By default animates the ion distribution function. If `electron=true` is passed, animates +the electron distribution function instead. If `neutral=true` is passed, animates the +neutral distribution function instead. `is` selects which species to analyse. @@ -4398,14 +4438,15 @@ we have to handle time-varying coordinates so cannot use [`animate_2d`](@ref)). """ function animate_f_unnorm_vs_vpa_z end -function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, - axis_args=Dict{Symbol,Any}(), kwargs...) +function animate_f_unnorm_vs_vpa_z(run_info::Tuple; electron=false, neutral=false, + outfile=nothing, axis_args=Dict{Symbol,Any}(), + kwargs...) try n_runs = length(run_info) frame_index = Observable(1) - var_name = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + var_name = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" if length(run_info) > 1 title = var_name subtitles = (lift(i->LaTeXString(string(ri.run_name, "\nt = ", ri.time[i])), @@ -4422,7 +4463,7 @@ function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothi axis_args...) for (ri, ax, colorbar_place) ∈ zip(run_info, axes, colorbar_places) - animate_f_unnorm_vs_vpa_z(ri; neutral=neutral, ax=ax, + animate_f_unnorm_vs_vpa_z(ri; electron=electron, neutral=neutral, ax=ax, colorbar_place=colorbar_place, frame_index=frame_index, kwargs...) end @@ -4440,11 +4481,17 @@ function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothi end end -function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, - fig=nothing, ax=nothing, colorbar_place=nothing, +function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + is=1, fig=nothing, ax=nothing, colorbar_place=nothing, frame_index=nothing, outfile=nothing, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4482,17 +4529,19 @@ function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, ivzeta=nothing, ivr=nothing, ivz=nothing) vpa_grid = run_info.vz.grid else - f = VariableCache(run_info, "f", chunk_size_2d; it=nothing, is=is, ir=input.ir0, - iz=nothing, ivperp=input.ivperp0, ivpa=nothing, ivzeta=nothing, - ivr=nothing, ivz=nothing) - density = VariableCache(run_info, "density", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, - ivzeta=nothing, ivr=nothing, ivz=nothing) - upar = VariableCache(run_info, "parallel_flow", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, - ivzeta=nothing, ivr=nothing, ivz=nothing) - vth = VariableCache(run_info, "thermal_speed", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = VariableCache(run_info, "f$suffix", chunk_size_2d; it=nothing, is=is, + ir=input.ir0, iz=nothing, ivperp=input.ivperp0, ivpa=nothing, + ivzeta=nothing, ivr=nothing, ivz=nothing) + density = VariableCache(run_info, "$(prefix)density", chunk_size_1d; it=nothing, + is=is, ir=input.ir0, iz=nothing, ivperp=nothing, + ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) + upar = VariableCache(run_info, "$(prefix)parallel_flow", chunk_size_1d; + it=nothing, is=is, ir=input.ir0, iz=nothing, ivperp=nothing, + ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) + vth = VariableCache(run_info, "$(prefix)thermal_speed", chunk_size_1d; it=nothing, + is=is, ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) vpa_grid = run_info.vpa.grid end