diff --git a/.github/workflows/longtest.yml b/.github/workflows/longtest.yml index 8d4990bb6..5c0890c04 100644 --- a/.github/workflows/longtest.yml +++ b/.github/workflows/longtest.yml @@ -15,7 +15,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest] fail-fast: false - timeout-minutes: 90 + timeout-minutes: 120 steps: - uses: actions/checkout@v4 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-time-evolving-ge-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-time-evolving-ge-wall-Jac.toml new file mode 100644 index 000000000..659d2ed28 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-time-evolving-ge-wall-Jac.toml @@ -0,0 +1,165 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "sqrt" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_time_evolving=true +implicit_electron_ppar = false +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 10000 +nwrite_dfns = 10000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +maximum_dt = 2.0e-5 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 50 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 25 +total_its_soft_limit = 30 +adi_precon_iterations = 3 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/machines/generic-batch-template/compile_dependencies.sh b/machines/generic-batch-template/compile_dependencies.sh index 966ef12d9..2f333f12b 100755 --- a/machines/generic-batch-template/compile_dependencies.sh +++ b/machines/generic-batch-template/compile_dependencies.sh @@ -77,10 +77,10 @@ if [[ $BUILDHDF5 == "y" && -d hdf5-build ]]; then fi if [[ $BUILDHDF5 == "y" ]]; then - HDF5=hdf5-1.14.5 + HDF5=hdf5-1.14.3 # Download and extract the source - wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz - tar xjf ${HDF5}.tar.gz + wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 + tar xjf ${HDF5}.tar.bz2 cd $HDF5 diff --git a/machines/generic-pc/compile_dependencies.sh b/machines/generic-pc/compile_dependencies.sh index 476c0ed0d..ae70bd6b9 100755 --- a/machines/generic-pc/compile_dependencies.sh +++ b/machines/generic-pc/compile_dependencies.sh @@ -77,10 +77,10 @@ else fi if [[ $BUILDHDF5 == "y" ]]; then - HDF5=hdf5-1.14.5 + HDF5=hdf5-1.14.3 # Download and extract the source - wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz - tar xzf ${HDF5}.tar.gz + wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 + tar xjf ${HDF5}.tar.bz2 cd $HDF5 diff --git a/machines/marconi/compile_dependencies.sh b/machines/marconi/compile_dependencies.sh index 70aae5b49..e18a41e25 100755 --- a/machines/marconi/compile_dependencies.sh +++ b/machines/marconi/compile_dependencies.sh @@ -30,10 +30,10 @@ if [ -d hdf5-build ]; then fi if [ $BUILDHDF5 -eq 0 ]; then - HDF5=hdf5-1.14.5 + HDF5=hdf5-1.14.3 # Download and extract the source - wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz - tar xjf ${HDF5}.tar.gz + wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 + tar xjf ${HDF5}.tar.bz2 cd $HDF5 diff --git a/machines/shared/add_dependencies_to_project.jl b/machines/shared/add_dependencies_to_project.jl index 87b62eb50..f7e844628 100644 --- a/machines/shared/add_dependencies_to_project.jl +++ b/machines/shared/add_dependencies_to_project.jl @@ -186,8 +186,7 @@ if mk_preferences["use_system_mpi"] == "y" local_hdf5_install_dir = realpath(local_hdf5_install_dir) # We have downloaded and compiled HDF5, so link that hdf5_dir = local_hdf5_install_dir - hdf5_lib = joinpath(local_hdf5_install_dir, "libhdf5.so") - hdf5_lib_hl = joinpath(local_hdf5_install_dir, "libhdf5_hl.so") + hdf5_lib, hdf5_lib_hl = get_hdf5_lib_names(local_hdf5_install_dir) elseif !prompt_for_lib_paths hdf5_dir = mk_preferences["hdf5_dir"] if hdf5_dir != "default" @@ -207,9 +206,10 @@ if mk_preferences["use_system_mpi"] == "y" global hdf5_dir, hdf5_lib, hdf5_lib_hl hdf5_dir = get_input_with_path_completion( "\nAn HDF5 installation compiled with your system MPI is required to use\n" - * "parallel I/O. Enter the directory where the libhdf5.so and libhdf5_hl.so are\n" - * "located (enter 'default' to use the Julia-provided HDF5, which does not\n" - * "support parallel I/O): [$default_hdf5_dir]") + * "parallel I/O. Enter the directory where the libhdf5.so and\n" + * "libhdf5_hl.so (or libhdf5.dylib and libhdf5_hl.dylib on macOS)\n" + * "are located (enter 'default' to use the Julia-provided HDF5, which\n" + * "is not compatible with using the system MPI): [$default_hdf5_dir]") if hdf5_dir == "" hdf5_dir = default_hdf5_dir @@ -222,8 +222,7 @@ if mk_preferences["use_system_mpi"] == "y" if isdir(hdf5_dir) hdf5_dir = realpath(hdf5_dir) end - hdf5_lib = joinpath(hdf5_dir, "libhdf5.so") - hdf5_lib_hl = joinpath(hdf5_dir, "libhdf5_hl.so") + hdf5_lib, hdf5_lib_hl = get_hdf5_lib_names(hdf5_dir) if isfile(hdf5_lib) && isfile(hdf5_lib_hl) break else diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index a5f3aeee0..aa7a94e9d 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -731,6 +731,7 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, advection_velocity=false, colormap=this_input_dict["colormap"], animation_ext=this_input_dict["animation_ext"], + n_points_near_wall=4, ) set_defaults_and_check_section!( @@ -740,6 +741,7 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, advection_velocity=false, colormap=this_input_dict["colormap"], animation_ext=this_input_dict["animation_ext"], + n_points_near_wall=4, ) set_defaults_and_check_section!( @@ -749,6 +751,7 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, advection_velocity=false, colormap=this_input_dict["colormap"], animation_ext=this_input_dict["animation_ext"], + n_points_near_wall=4, ) set_defaults_and_check_section!( @@ -877,11 +880,11 @@ end """ get_run_info(run_dir...; itime_min=1, itime_max=0, - itime_skip=1, dfns=false, initial_electron=false, do_setup=true, - setup_input_file=nothing) + itime_skip=1, dfns=false, initial_electron=false, electron_debug=false, + do_setup=true, setup_input_file=nothing) get_run_info((run_dir, restart_index)...; itime_min=1, itime_max=0, - itime_skip=1, dfns=false, initial_electron=false, do_setup=true, - setup_input_file=nothing) + itime_skip=1, dfns=false, initial_electron=false, electron_debug=false, + do_setup=true, setup_input_file=nothing) Get file handles and other info for a single run @@ -904,7 +907,7 @@ mix Strings and Tuples in a call). By default load data from moments files, pass `dfns=true` to load from distribution functions files, or `initial_electron=true` and `dfns=true` to load from initial electron -state files. +state files, or `electron_debug=true` and `dfns=true` to load from electron debug files. The `itime_min`, `itime_max` and `itime_skip` options can be used to select only a slice of time points when loading data. In `makie_post_process` these options are read from the @@ -1558,7 +1561,11 @@ for dim ∈ one_dimension_combinations end if n_runs > 1 - put_legend_above(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) end if outfile !== nothing @@ -1975,11 +1982,11 @@ for dim ∈ one_dimension_combinations_no_t all(isapprox.(ri.time, run_info[1].time)) for ri ∈ run_info[2:end]) # All times are the same - time = select_slice(run_info[1].time, :t; input=input, it=it) + time = select_time_slice(run_info[1].time, it) title = lift(i->string("t = ", time[i]), frame_index) else title = lift(i->join((string("t", irun, " = ", - select_slice(ri.time, :t; input=input, it=it)[i]) + select_time_slice(ri.time, it)[i]) for (irun,ri) ∈ enumerate(run_info)), "; "), frame_index) end @@ -2005,7 +2012,11 @@ for dim ∈ one_dimension_combinations_no_t end if n_runs > 1 - put_legend_above(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) end if it === nothing @@ -2068,7 +2079,7 @@ for dim ∈ one_dimension_combinations_no_t ind = frame_index end if ax === nothing - time = select_slice(run_info.time, :t; input=input, it=it) + time = select_time_slice(run_info.time, it) title = lift(i->string("t = ", time[i]), ind) fig, ax = get_1d_ax(; xlabel="$($dim_str)", ylabel=get_variable_symbol(var_name), @@ -2218,13 +2229,13 @@ for (dim1, dim2) ∈ two_dimension_combinations_no_t if length(run_info) > 1 title = get_variable_symbol(var_name) subtitles = (lift(i->string(ri.run_name, "\nt = ", - select_slice(ri.time, :t; input=input, it=it)[i]), + select_time_slice(ri.time, it)[i]), frame_index) for ri ∈ run_info) else - time = select_slice(run_info[1].time, :t; input=input, it=it) + time = select_time_slice(run_info[1].time, it) title = lift(i->string(get_variable_symbol(var_name), "\nt = ", - run_info[1].time[i]), + time[i]), frame_index) subtitles = nothing end @@ -2306,9 +2317,9 @@ for (dim1, dim2) ∈ two_dimension_combinations_no_t colormap = input.colormap end if title === nothing && ax == nothing - time = select_slice(run_info.time, :t; input=input, it=it) + time = select_time_slice(run_info.time, it) title = lift(i->string(get_variable_symbol(var_name), "\nt = ", - run_info.time[i]), + time[i]), ind) end @@ -3544,6 +3555,20 @@ function select_slice(variable::AbstractArray{T,7}, dims::Symbol...; input=nothi return slice end +""" + select_time_slice(time::AbstractVector, range) + +Variant of `select_slice()` to be used on 'time' arrays, which are always 1D. +""" +function select_time_slice(time::AbstractVector, range) + if range === nothing + return time + else + return @view time[range] + end +end + + """ get_dimension_slice_indices(keep_dims...; input, it=nothing, is=nothing, ir=nothing, iz=nothing, ivperp=nothing, ivpa=nothing, @@ -3919,7 +3944,11 @@ function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=false end if n_runs > 1 - put_legend_above(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) end if outfile !== nothing @@ -4281,7 +4310,11 @@ function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=fa end if n_runs > 1 - put_legend_above(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) end if outfile !== nothing @@ -4696,8 +4729,8 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) nt = minimum(ri.nt for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), - (z_upper, z_upper-4:z_upper, "wall+")) + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+input.n_points_near_wall, "wall-"), + (z_upper, z_upper-input.n_points_near_wall:z_upper, "wall+")) f_input = copy(input_dict_dfns["f"]) f_input["iz0"] = z @@ -4714,7 +4747,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) label="$(run_label)iz=$iz", ax=ax) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf$(electron_suffix)_$(label)_vs_vpa.pdf" save(outfile, fig) @@ -4731,7 +4768,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf$(electron_suffix)_$(label)_vs_vpa.pdf" save(outfile, fig) @@ -4748,7 +4789,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) label="$(run_label)iz=$iz", ax=ax) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf" save(outfile, fig) @@ -4765,7 +4810,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa.pdf" save(outfile, fig) end @@ -4806,7 +4855,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) frame_index=frame_index) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf$(electron_suffix)_$(label)_vs_vpa." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -4825,7 +4878,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf$(electron_suffix)_$(label)_vs_vpa." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -4844,7 +4901,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) frame_index=frame_index) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -4863,7 +4924,11 @@ function plot_charged_pdf_2D_at_wall(run_info; plot_prefix, electron=false) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa." * input.animation_ext save_animation(fig, frame_index, nt, outfile) end @@ -4951,8 +5016,8 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) for ri ∈ run_info) nt = minimum(ri.nt for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), - (z_upper, z_upper-4:z_upper, "wall+")) + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+input.n_points_near_wall, "wall-"), + (z_upper, z_upper-input.n_points_near_wall:z_upper, "wall+")) f_neutral_input = copy(input_dict_dfns["f_neutral"]) f_neutral_input["iz0"] = z @@ -4969,7 +5034,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) label="$(run_label)iz=$iz", ax=ax) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf" save(outfile, fig) @@ -4986,7 +5055,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vpa.pdf" save(outfile, fig) @@ -5004,7 +5077,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) ax=ax) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf" save(outfile, fig) @@ -5022,7 +5099,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vpa.pdf" save(outfile, fig) end @@ -5081,7 +5162,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) frame_index=frame_index) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -5100,7 +5185,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vz." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -5120,7 +5209,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) frame_index=frame_index) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext save_animation(fig, frame_index, nt, outfile) @@ -5139,7 +5232,11 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) transform=(x)->positive_or_nan(x; epsilon=1.e-20)) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext save_animation(fig, frame_index, nt, outfile) end @@ -5264,12 +5361,16 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) input=input) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) save(plot_prefix * "ion_constraints.pdf", fig) end # Neutrals - if any(ri.n_neutral_species > 1 + if any(ri.n_neutral_species > 0 && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) @@ -5308,42 +5409,50 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) input=input) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) save(plot_prefix * "neutral_constraints.pdf", fig) end # Electrons - #if any(ri.composition.electron_physics ∈ (kinetic_electrons, - # kinetic_electrons_with_temperature_equation) - # for ri ∈ run_info) - - # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - # for ri ∈ run_info - # if length(run_info) > 1 - # prefix = ri.run_name * ", " - # else - # prefix = "" - # end - - # varname = "electron_constraints_A_coefficient" - # label = prefix * "(A-1)" - # data = get_variable(ri, varname; it=it0, ir=ir0) - # data .-= 1.0 - # plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) - - # varname = "electron_constraints_B_coefficient" - # label = prefix * "B" - # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - # input=input) - - # varname = "electron_constraints_C_coefficient" - # label = prefix * "C" - # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - # input=input) - # end - # put_legend_right(fig, ax) - # save(plot_prefix * "electron_constraints.pdf", fig) - #end + if any(ri.composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + for ri ∈ run_info) + + fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + for ri ∈ run_info + if length(run_info) > 1 + prefix = ri.run_name * ", " + else + prefix = "" + end + + varname = "electron_constraints_A_coefficient" + label = prefix * "(A-1)" + data = get_variable(ri, varname; it=it0, ir=ir0) + data .-= 1.0 + plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) + + varname = "electron_constraints_B_coefficient" + label = prefix * "B" + plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + input=input) + + varname = "electron_constraints_C_coefficient" + label = prefix * "C" + plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + input=input) + end + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) + save(plot_prefix * "electron_constraints.pdf", fig) + end end if input.animate @@ -5406,14 +5515,18 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) input=input) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) ylims!(ax, ymin, ymax) save_animation(fig, frame_index, nt, plot_prefix * "ion_constraints." * input.animation_ext) end # Neutrals - if any(ri.n_neutral_species > 1 + if any(ri.n_neutral_species > 0 && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) @@ -5470,62 +5583,70 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) input=input) end end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) ylims!(ax, ymin, ymax) save_animation(fig, frame_index, nt, plot_prefix * "neutral_constraints." * input.animation_ext) end # Electrons - #if any(ri.composition.electron_physics ∈ (kinetic_electrons, - # kinetic_electrons_with_temperature_equation) - # for ri ∈ run_info) - - # frame_index = Observable(1) - # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - - # # Calculate plot limits manually so we can exclude the first time point, which - # # often has a large value for (A-1) due to the way initialisation is done, - # # which can make the subsequent values hard to see. - # ymin = Inf - # ymax = -Inf - # for ri ∈ run_info - # if length(run_info) > 1 - # prefix = ri.run_name * ", " - # else - # prefix = "" - # end - - # varname = "electron_constraints_A_coefficient" - # label = prefix * "(A-1)" - # data = get_variable(ri, varname; ir=ir0) - # data .-= 1.0 - # ymin = min(ymin, minimum(data[:,2:end])) - # ymax = max(ymax, maximum(data[:,2:end])) - # animate_vs_z(ri, varname; label=label, data=data, - # frame_index=frame_index, ax=ax, input=input) - - # varname = "electron_constraints_B_coefficient" - # label = prefix * "B" - # data = get_variable(ri, varname; ir=ir0) - # ymin = min(ymin, minimum(data[:,2:end])) - # ymax = max(ymax, maximum(data[:,2:end])) - # animate_vs_z(ri, varname; label=label, data=data, - # frame_index=frame_index, ax=ax, ir=ir0, input=input) - - # varname = "electron_constraints_C_coefficient" - # label = prefix * "C" - # data = get_variable(ri, varname; ir=ir0) - # ymin = min(ymin, minimum(data[:,2:end])) - # ymax = max(ymax, maximum(data[:,2:end])) - # animate_vs_z(ri, varname; label=label, data=data, - # frame_index=frame_index, ax=ax, ir=ir0, input=input) - # end - # put_legend_right(fig, ax) - # ylims!(ax, ymin, ymax) - # save_animation(fig, frame_index, nt, - # plot_prefix * "electron_constraints." * input.animation_ext) - #end + if any(ri.composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + for ri ∈ run_info) + + frame_index = Observable(1) + fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + + # Calculate plot limits manually so we can exclude the first time point, which + # often has a large value for (A-1) due to the way initialisation is done, + # which can make the subsequent values hard to see. + ymin = Inf + ymax = -Inf + for ri ∈ run_info + if length(run_info) > 1 + prefix = ri.run_name * ", " + else + prefix = "" + end + + varname = "electron_constraints_A_coefficient" + label = prefix * "(A-1)" + data = get_variable(ri, varname; ir=ir0) + data .-= 1.0 + ymin = min(ymin, minimum(data[:,2:end])) + ymax = max(ymax, maximum(data[:,2:end])) + animate_vs_z(ri, varname; label=label, data=data, + frame_index=frame_index, ax=ax, input=input) + + varname = "electron_constraints_B_coefficient" + label = prefix * "B" + data = get_variable(ri, varname; ir=ir0) + ymin = min(ymin, minimum(data[:,2:end])) + ymax = max(ymax, maximum(data[:,2:end])) + animate_vs_z(ri, varname; label=label, data=data, + frame_index=frame_index, ax=ax, ir=ir0, input=input) + + varname = "electron_constraints_C_coefficient" + label = prefix * "C" + data = get_variable(ri, varname; ir=ir0) + ymin = min(ymin, minimum(data[:,2:end])) + ymax = max(ymax, maximum(data[:,2:end])) + animate_vs_z(ri, varname; label=label, data=data, + frame_index=frame_index, ax=ax, ir=ir0, input=input) + end + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) + ylims!(ax, ymin, ymax) + save_animation(fig, frame_index, nt, + plot_prefix * "electron_constraints." * input.animation_ext) + end end catch e return makie_post_processing_error_handler( @@ -5711,12 +5832,20 @@ function Chodura_condition_plots(run_info::Tuple; plot_prefix) fig = figs[1] ax = axes[1][1] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "Chodura_ratio_lower_vs_t.pdf") save(outfile, fig) fig = figs[2] ax = axes[1][2] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "Chodura_ratio_upper_vs_t.pdf") save(outfile, fig) end @@ -5724,12 +5853,20 @@ function Chodura_condition_plots(run_info::Tuple; plot_prefix) fig = figs[3] ax = axes[1][3] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "Chodura_ratio_lower_vs_r.pdf") save(outfile, fig) fig = figs[4] ax = axes[1][4] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "Chodura_ratio_upper_vs_r.pdf") save(outfile, fig) end @@ -5747,12 +5884,20 @@ function Chodura_condition_plots(run_info::Tuple; plot_prefix) println("check axes ", axes) ax = axes[1][7] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "pdf_unnorm_over_vpa2_wall-_vs_vpa.pdf") save(outfile, fig) fig = figs[8] ax = axes[1][8] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "pdf_unnorm_over_vpa2_wall+_vs_vpa.pdf") save(outfile, fig) end @@ -5763,6 +5908,10 @@ function Chodura_condition_plots(run_info::Tuple; plot_prefix) ax = axes[1][9][1] frame_index = axes[1][9][2] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "pdf_unnorm_over_vpa2_wall-_vs_vpa." * input.animation_ext) save_animation(fig, frame_index, nt, outfile) @@ -5770,6 +5919,10 @@ function Chodura_condition_plots(run_info::Tuple; plot_prefix) ax = axes[1][10][1] frame_index = axes[1][10][2] put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) outfile = string(plot_prefix, "pdf_unnorm_over_vpa2_wall+_vs_vpa." * input.animation_ext) save_animation(fig, frame_index, nt, outfile) end @@ -6054,7 +6207,11 @@ function sound_wave_plots(run_info::Tuple; plot_prefix) end if input.plot - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) save(outfile, fig) @@ -6122,7 +6279,11 @@ function sound_wave_plots(run_info; outfile=nothing, ax=nothing, phi=nothing) error("Cannot save figure from this function when `ax` was passed. Please " * "save the figure that contains `ax`") end - put_legend_right(fig, ax) + put_legend_below(fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(fig) save(outfile, fig) end end @@ -7804,7 +7965,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n end end - put_legend_right(steps_fig, ax_failures) + put_legend_below(steps_fig, ax_failures) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(steps_fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(steps_fig) if plot_prefix !== nothing outfile = plot_prefix * electron_prefix * "timestep_diagnostics.pdf" @@ -7902,7 +8067,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n end end #ylims!(ax, 0.0, 10.0 * maxval) - put_legend_right(CFL_fig, ax) + put_legend_below(CFL_fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(CFL_fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(CFL_fig) if plot_prefix !== nothing outfile = plot_prefix * electron_prefix * "CFL_factors.pdf" @@ -8069,7 +8238,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n end end - put_legend_right(limits_fig, ax) + put_legend_below(limits_fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(limits_fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(limits_fig) if plot_prefix !== nothing outfile = plot_prefix * electron_prefix * "timestep_limits.pdf" @@ -8118,7 +8291,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n end if has_nl_solver - put_legend_right(nl_solvers_fig, ax) + put_legend_below(nl_solvers_fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(nl_solvers_fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(nl_solvers_fig) if plot_prefix !== nothing outfile = plot_prefix * "nonlinear_solver_iterations.pdf" @@ -8159,7 +8336,11 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n end if has_electron_solve - put_legend_right(electron_solver_fig, ax) + put_legend_below(electron_solver_fig, ax) + # Ensure the first row width is 3/4 of the column width so that + # the plot does not get squashed by the legend + rowsize!(electron_solver_fig.layout, 1, Aspect(1, 3/4)) + resize_to_layout!(electron_solver_fig) if has_electron_solve outfile = plot_prefix * "electron_steps.pdf" diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index bd2f1cdbb..c77d8c1ac 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -127,6 +127,12 @@ end function calculate_electron_moments!(scratch, pdf, moments, composition, collisions, r, z, vpa) + if length(scratch.pdf_electron) > 0 + pdf_electron = scratch.pdf_electron + else + pdf_electron = pdf.electron + end + electron_ppar = scratch.electron_ppar calculate_electron_density!(scratch.electron_density, moments.electron.dens_updated, scratch.density) calculate_electron_upar_from_charge_conservation!( @@ -136,20 +142,20 @@ function calculate_electron_moments!(scratch, pdf, moments, composition, collisi kinetic_electrons_with_temperature_equation) begin_r_z_region() @loop_r_z ir iz begin - scratch.electron_ppar[iz,ir] = 0.5 * composition.me_over_mi * - scratch.electron_density[iz,ir] * - moments.electron.vth[iz,ir]^2 + electron_ppar[iz,ir] = 0.5 * composition.me_over_mi * + scratch.electron_density[iz,ir] * + moments.electron.vth[iz,ir]^2 end moments.electron.ppar_updated[] = true end - update_electron_vth_temperature!(moments, scratch.electron_ppar, - scratch.electron_density, composition) - calculate_electron_qpar!(moments.electron, pdf.electron, scratch.electron_ppar, + update_electron_vth_temperature!(moments, electron_ppar, scratch.electron_density, + composition) + calculate_electron_qpar!(moments.electron, pdf_electron, electron_ppar, scratch.electron_upar, scratch.upar, collisions.electron_fluid.nu_ei, composition.me_over_mi, composition.electron_physics, vpa) if composition.electron_physics == braginskii_fluid - electron_fluid_qpar_boundary_condition!(scratch.electron_ppar, + electron_fluid_qpar_boundary_condition!(electron_ppar, scratch.electron_upar, scratch.electron_density, moments.electron, z) @@ -864,7 +870,7 @@ function calculate_electron_qpar_from_pdf!(qpar, ppar, vth, pdf, vpa) begin_r_z_region() ivperp = 1 @loop_r_z ir iz begin - @views qpar[iz, ir] = 2*ppar[iz,ir]*vth[iz,ir]*integrate_over_vspace(pdf[:, ivperp, iz, ir], vpa.grid.^3, vpa.wgts) + @views qpar[iz, ir] = 2*ppar[iz,ir]*vth[iz,ir]*integrate_over_vspace(pdf[:, ivperp, iz, ir], vpa.grid, 3, vpa.wgts) end end diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 995c51630..d023dc29a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -124,12 +124,13 @@ OUTPUT: max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) elseif solution_method == "backward_euler" - return electron_backward_euler!(scratch, pdf, moments, phi, collisions, - composition, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, t_params, external_source_settings, - num_diss_params, nl_solver_params, max_electron_pdf_iterations, - max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, - residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) + return electron_backward_euler_pseudotimestepping!(scratch, pdf, moments, phi, + collisions, composition, r, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, + external_source_settings, num_diss_params, nl_solver_params, + max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_electron, + initial_time=initial_time, residual_tolerance=residual_tolerance, + evolve_ppar=evolve_ppar, ion_dt=ion_dt) elseif solution_method == "shooting_method" dens = moments.electron.dens vthe = moments.electron.vth @@ -199,10 +200,6 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll initial_time=nothing, residual_tolerance=nothing, evolve_ppar=false, ion_dt=nothing) - if max_electron_pdf_iterations !== nothing && max_electron_sim_time !== nothing - error("Cannot use both max_electron_pdf_iterations=$max_electron_pdf_iterations " - * "and max_electron_sim_time=$max_electron_sim_time at the same time") - end if max_electron_pdf_iterations === nothing && max_electron_sim_time === nothing error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") end @@ -333,12 +330,17 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll end end + # Counter for pseudotime just in this pseudotimestepping loop + local_pseudotime = 0.0 + residual_norm = -1.0 + begin_serial_region() t_params.moments_output_counter[] += 1 @serial_region begin if io_electron !== nothing write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, vpa) + t_params.moments_output_counter[], local_pseudotime, 0.0, + r, z, vperp, vpa) end end # evolve (artificially) in time until the residual is less than the tolerance @@ -483,6 +485,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll # update the time following the pdf update t_params.t[] += t_params.previous_dt[] + local_pseudotime += t_params.previous_dt[] residual = -1.0 if t_params.previous_dt[] > 0.0 @@ -572,7 +575,8 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll if io_electron !== nothing t_params.write_moments_output[] = false write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, + t_params.moments_output_counter[], + local_pseudotime, residual_norm, r, z, vperp, vpa) end end @@ -622,7 +626,8 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll if io_electron !== nothing && io_electron !== true t_params.moments_output_counter[] += 1 write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, vpa) + t_params.moments_output_counter[], local_pseudotime, + residual_norm, r, z, vperp, vpa) finish_electron_io(io_electron) end end @@ -642,15 +647,16 @@ advance of the electron kinetic equation until a steady-state solution is reache Note that this function does not use the [`runge_kutta`](@ref) timestep functionality. `t_params.previous_dt[]` is used to store the (adaptively updated) initial timestep of the pseudotimestepping loop (initial value of `t_params.dt[]` within -`electron_backward_euler!()`). `t_params.dt[]` is adapted according to the iteration -counts of the Newton solver. +`electron_backward_euler_pseudotimestepping!()`). `t_params.dt[]` is adapted according to +the iteration counts of the Newton solver. """ -function electron_backward_euler!(scratch, pdf, moments, phi, collisions, composition, r, - z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, t_params, external_source_settings, num_diss_params, - nl_solver_params, max_electron_pdf_iterations, max_electron_sim_time; - io_electron=nothing, initial_time=nothing, residual_tolerance=nothing, - evolve_ppar=false, ion_dt=nothing) +function electron_backward_euler_pseudotimestepping!(scratch, pdf, moments, phi, + collisions, composition, r, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, + external_source_settings, num_diss_params, nl_solver_params, + max_electron_pdf_iterations, max_electron_sim_time; io_electron=nothing, + initial_time=nothing, residual_tolerance=nothing, evolve_ppar=false, + ion_dt=nothing) if max_electron_pdf_iterations === nothing && max_electron_sim_time === nothing error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") @@ -750,12 +756,17 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos initial_step_counter = t_params.step_counter[] t_params.step_counter[] += 1 + # Pseudotime just in this pseudotimestepping loop + local_pseudotime = 0.0 + residual_norm = -1.0 + begin_serial_region() t_params.moments_output_counter[] += 1 @serial_region begin if io_electron !== nothing write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, vpa) + t_params.moments_output_counter[], local_pseudotime, 0.0, + r, z, vperp, vpa) end end electron_pdf_converged = Ref(false) @@ -799,754 +810,39 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end end - # Calculate heat flux and derivatives using updated f_electron - @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], - electron_ppar_new, - moments.electron.vth[:,ir], - f_electron_new, vpa, ir) - @views calculate_electron_moment_derivatives_no_r!( - moments, - (electron_density=moments.electron.dens[:,ir], - electron_upar=moments.electron.upar[:,ir], - electron_ppar=electron_ppar_new), - scratch_dummy, z, z_spectral, - num_diss_params.electron.moment_dissipation_coefficient, ir) - - if nl_solver_params.preconditioner_type === Val(:electron_split_lu) - if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval - nl_solver_params.solves_since_precon_update[] = 0 - - vth = @view moments.electron.vth[:,ir] - me = composition.me_over_mi - dens = @view moments.electron.dens[:,ir] - upar = @view moments.electron.upar[:,ir] - ppar = electron_ppar_new - ddens_dz = @view moments.electron.ddens_dz[:,ir] - dupar_dz = @view moments.electron.dupar_dz[:,ir] - dppar_dz = @view moments.electron.dppar_dz[:,ir] - dvth_dz = @view moments.electron.dvth_dz[:,ir] - dqpar_dz = @view moments.electron.dqpar_dz[:,ir] - source_amplitude = moments.electron.external_source_amplitude - source_density_amplitude = moments.electron.external_source_density_amplitude - source_momentum_amplitude = moments.electron.external_source_momentum_amplitude - source_pressure_amplitude = moments.electron.external_source_pressure_amplitude - - # Note the region(s) used here must be the same as the region(s) used - # when the matrices are used in `split_precon!()`, so that the - # parallelisation is the same and each matrix is used on the same - # process that created it. - - # z-advection preconditioner - begin_vperp_vpa_region() - update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) - @loop_vperp_vpa ivperp ivpa begin - z_matrix, ppar_matrix = get_electron_split_Jacobians!( - ivperp, ivpa, ppar, moments, collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) - @timeit_debug global_timer "lu" nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix)) - if ivperp == 1 && ivpa == 1 - @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) - end - end - end - - function split_precon!(x) - precon_ppar, precon_f = x - - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - z_precon_matrix = nl_solver_params.preconditioners.z[ivpa,ivperp,ir] - f_slice = @view precon_f[ivpa,ivperp,:] - @views z.scratch .= f_slice - @timeit_debug global_timer "ldiv!" ldiv!(z.scratch2, z_precon_matrix, z.scratch) - f_slice .= z.scratch2 - end - - begin_z_region() - ppar_precon_matrix = nl_solver_params.preconditioners.ppar[ir] - @loop_z iz begin - z.scratch[iz] = precon_ppar[iz] - end - - begin_serial_region() - @serial_region begin - @timeit_debug global_timer "ldiv!" ldiv!(precon_ppar, ppar_precon_matrix, z.scratch) - end - end - - left_preconditioner = identity - right_preconditioner = split_precon! - elseif nl_solver_params.preconditioner_type === Val(:electron_lu) - - if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || - t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] - - # dt has changed significantly, so update the preconditioner - nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval - end - - if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval -global_rank[] == 0 && println("recalculating precon") - nl_solver_params.solves_since_precon_update[] = 0 - nl_solver_params.precon_dt[] = t_params.dt[] - - orig_lu, precon_matrix, input_buffer, output_buffer = - nl_solver_params.preconditioners[ir] - - fill_electron_kinetic_equation_Jacobian!( - precon_matrix, f_electron_new, electron_ppar_new, moments, phi, - collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) - - begin_serial_region() - if block_rank[] == 0 - if size(orig_lu) == (1, 1) - # Have not properly created the LU decomposition before, so - # cannot reuse it. - @timeit_debug global_timer "lu" nl_solver_params.preconditioners[ir] = - (lu(sparse(precon_matrix)), precon_matrix, input_buffer, - output_buffer) - else - # LU decomposition was previously created. The Jacobian always - # has the same sparsity pattern, so by using `lu!()` we can - # reuse some setup. - try - @timeit_debug global_timer "lu!" lu!(orig_lu, sparse(precon_matrix); check=false) - catch e - if !isa(e, ArgumentError) - rethrow(e) - end - println("Sparsity pattern of matrix changed, rebuilding " - * " LU from scratch") - @timeit_debug global_timer "lu" orig_lu = lu(sparse(precon_matrix)) - end - nl_solver_params.preconditioners[ir] = - (orig_lu, precon_matrix, input_buffer, output_buffer) - end - else - nl_solver_params.preconditioners[ir] = - (orig_lu, precon_matrix, input_buffer, output_buffer) - end - end - - - @timeit_debug global_timer lu_precon!(x) = begin - precon_ppar, precon_f = x - - precon_lu, _, this_input_buffer, this_output_buffer = - nl_solver_params.preconditioners[ir] - - begin_serial_region() - counter = 1 - @loop_z_vperp_vpa iz ivperp ivpa begin - this_input_buffer[counter] = precon_f[ivpa,ivperp,iz] - counter += 1 - end - @loop_z iz begin - this_input_buffer[counter] = precon_ppar[iz] - counter += 1 - end - - begin_serial_region() - @serial_region begin - @timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer) - end - - begin_serial_region() - counter = 1 - @loop_z_vperp_vpa iz ivperp ivpa begin - precon_f[ivpa,ivperp,iz] = this_output_buffer[counter] - counter += 1 - end - @loop_z iz begin - precon_ppar[iz] = this_output_buffer[counter] - counter += 1 - end - - # Ensure values of precon_f and precon_ppar are consistent across - # distributed-MPI block boundaries. For precon_f take the upwind - # value, and for precon_ppar take the average. - f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] - f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] - receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] - receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] - f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] - end - # We upwind the z-derivatives in `electron_z_advection!()`, so would - # expect that upwinding the results here in z would make sense. - # However, upwinding here makes convergence much slower (~10x), - # compared to picking the values from one side or other of the block - # boundary, or taking the average of the values on either side. - # Neither direction is special, so taking the average seems most - # sensible (although in an intial test it does not seem to converge - # faster than just picking one or the other). - # Maybe this could indicate that it is more important to have a fully - # self-consistent Jacobian inversion for the - # `electron_vpa_advection()` part rather than taking half(ish) of the - # values from one block and the other half(ish) from the other. - reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( - precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, - receive_buffer2, z) - - begin_serial_region() - @serial_region begin - buffer_1[] = precon_ppar[1] - buffer_2[] = precon_ppar[end] - end - reconcile_element_boundaries_MPI!( - precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) - - return nothing - end - - left_preconditioner = identity - right_preconditioner = lu_precon! - elseif nl_solver_params.preconditioner_type === Val(:electron_adi) - - if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || - t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] - - # dt has changed significantly, so update the preconditioner - nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval - end - - if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval -global_rank[] == 0 && println("recalculating precon") - nl_solver_params.solves_since_precon_update[] = 0 - nl_solver_params.precon_dt[] = t_params.dt[] - - adi_info = nl_solver_params.preconditioners[ir] - - dens = @view moments.electron.dens[:,ir] - upar = @view moments.electron.upar[:,ir] - vth = @view moments.electron.vth[:,ir] - qpar = @view moments.electron.qpar[:,ir] - - # Reconstruct w_∥^3 moment of g_e from already-calculated qpar - third_moment = scratch_dummy.buffer_z_1 - dthird_moment_dz = scratch_dummy.buffer_z_2 - begin_z_region() - @loop_z iz begin - third_moment[iz] = 0.5 * qpar[iz] / electron_ppar_new[iz] / vth[iz] - end - derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, - buffer_3, buffer_4, z_spectral, z) - - z_speed = @view z_advect[1].speed[:,:,:,ir] - - dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] - begin_vperp_vpa_region() - update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) - @loop_vperp_vpa ivperp ivpa begin - @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] - end - #calculate the upwind derivative - @views derivative_z_pdf_vpavperpz!(dpdf_dz, f_electron_new, - z_advect[1].adv_fac[:,:,:,ir], - scratch_dummy.buffer_vpavperpr_1[:,:,ir], - scratch_dummy.buffer_vpavperpr_2[:,:,ir], - scratch_dummy.buffer_vpavperpr_3[:,:,ir], - scratch_dummy.buffer_vpavperpr_4[:,:,ir], - scratch_dummy.buffer_vpavperpr_5[:,:,ir], - scratch_dummy.buffer_vpavperpr_6[:,:,ir], - z_spectral, z) - - dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] - begin_z_vperp_region() - update_electron_speed_vpa!(vpa_advect[1], dens, upar, - electron_ppar_new, moments, vpa.grid, - external_source_settings.electron, ir) - @loop_z_vperp iz ivperp begin - @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] - end - #calculate the upwind derivative of the electron pdf w.r.t. wpa - @loop_z_vperp iz ivperp begin - @views derivative!(dpdf_dvpa[:,ivperp,iz], f_electron_new[:,ivperp,iz], vpa, - vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) - end - - zeroth_moment = z.scratch_shared - first_moment = z.scratch_shared2 - second_moment = z.scratch_shared3 - begin_z_region() - vpa_grid = vpa.grid - vpa_wgts = vpa.wgts - @loop_z iz begin - @views zeroth_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_wgts) - @views first_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, vpa_wgts) - @views second_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, 2, vpa_wgts) - end - - v_size = vperp.n * vpa.n - - # Do setup for 'v solves' - v_solve_counter = 0 - A = adi_info.v_solve_matrix_buffer - explicit_J = adi_info.J_buffer - # Get sparse matrix for explicit, right-hand-side part of the - # solve. - if adi_info.n_extra_iterations > 0 - # If we only do one 'iteration' we don't need the 'explicit - # matrix' for the first solve (the v-solve), because the initial - # guess is zero, - fill_electron_kinetic_equation_Jacobian!( - explicit_J, f_electron_new, electron_ppar_new, moments, - phi, collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, external_source_settings, num_diss_params, - t_params, ion_dt, ir, evolve_ppar, :explicit_z, false) - end - begin_z_region() - @loop_z iz begin - v_solve_counter += 1 - # Get LU-factorized matrix for implicit part of the solve - @views fill_electron_kinetic_equation_v_only_Jacobian!( - A, f_electron_new[:,:,iz], electron_ppar_new[iz], - dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, - zeroth_moment[iz], first_moment[iz], second_moment[iz], - third_moment[iz], dthird_moment_dz[iz], phi[iz], collisions, - composition, z, vperp, vpa, z_spectral, vperp_spectral, - vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, - ir, iz, evolve_ppar) - A_sparse = sparse(A) - if !isassigned(adi_info.v_solve_implicit_lus, v_solve_counter) - @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) - else - # LU decomposition was previously created. The Jacobian always - # has the same sparsity pattern, so by using `lu!()` we can - # reuse some setup. - try - @timeit_debug global_timer "lu!" lu!(adi_info.v_solve_implicit_lus[v_solve_counter], A_sparse; check=false) - catch e - if !isa(e, ArgumentError) - rethrow(e) - end - println("Sparsity pattern of matrix changed, rebuilding " - * " LU from scratch ir=$ir, iz=$iz") - @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) - end - end - - if adi_info.n_extra_iterations > 0 - # If we only do one 'iteration' we don't need the 'explicit - # matrix' for the first solve (the v-solve), because the - # initial guess is zero, - adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:])) - end - end - @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))") - - # Do setup for 'z solves' - z_solve_counter = 0 - A = adi_info.z_solve_matrix_buffer - explicit_J = adi_info.J_buffer - # Get sparse matrix for explicit, right-hand-side part of the - # solve. - fill_electron_kinetic_equation_Jacobian!( - explicit_J, f_electron_new, electron_ppar_new, moments, phi, - collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_v, false) - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - z_solve_counter += 1 - - # Get LU-factorized matrix for implicit part of the solve - @views fill_electron_kinetic_equation_z_only_Jacobian_f!( - A, f_electron_new[ivpa,ivperp,:], electron_ppar_new, - dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed, - moments, zeroth_moment, first_moment, second_moment, - third_moment, dthird_moment_dz, collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params, ion_dt, ir, ivperp, ivpa, - evolve_ppar) - - A_sparse = sparse(A) - if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) - @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) - else - # LU decomposition was previously created. The Jacobian always - # has the same sparsity pattern, so by using `lu!()` we can - # reuse some setup. - try - @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) - catch e - if !isa(e, ArgumentError) - rethrow(e) - end - println("Sparsity pattern of matrix changed, rebuilding " - * " LU from scratch ir=$ir, ivperp=$ivperp, ivpa=$ivpa") - @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) - end - end - - adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) - end - begin_serial_region(; no_synchronize=true) - @serial_region begin - # Do the solve for ppar on the rank-0 process, which has the - # fewest grid points to handle if there are not an exactly equal - # number of points for each process. - z_solve_counter += 1 - - # Get LU-factorized matrix for implicit part of the solve - @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!( - A, electron_ppar_new, moments, zeroth_moment, first_moment, - second_moment, third_moment, dthird_moment_dz, collisions, - composition, z, vperp, vpa, z_spectral, vperp_spectral, - vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) - - A_sparse = sparse(A) - if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) - @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) - else - # LU decomposition was previously created. The Jacobian always - # has the same sparsity pattern, so by using `lu!()` we can - # reuse some setup. - try - @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) - catch e - if !isa(e, ArgumentError) - rethrow(e) - end - println("Sparsity pattern of matrix changed, rebuilding " - * " LU from scratch ir=$ir, ppar z-solve") - @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) - end - end - - adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) - end - @boundscheck z_solve_counter == adi_info.z_solve_nsolve || error("z_solve_counter($z_solve_counter) != z_solve_nsolve($(adi_info.z_solve_nsolve))") - end - - @timeit_debug global_timer adi_precon!(x) = begin - precon_ppar, precon_f = x - - adi_info = nl_solver_params.preconditioners[ir] - precon_iterations = nl_solver_params.precon_iterations - this_input_buffer = adi_info.input_buffer - this_intermediate_buffer = adi_info.intermediate_buffer - this_output_buffer = adi_info.output_buffer - global_index_subrange = adi_info.global_index_subrange - n_extra_iterations = adi_info.n_extra_iterations - - v_size = vperp.n * vpa.n - pdf_size = z.n * v_size - - # Use these views to communicate block-boundary points - output_buffer_pdf_view = reshape(@view(this_output_buffer[1:pdf_size]), size(precon_f)) - output_buffer_ppar_view = @view(this_output_buffer[pdf_size+1:end]) - f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] - f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] - receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] - receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] - - function adi_communicate_boundary_points() - # Ensure values of precon_f and precon_ppar are consistent across - # distributed-MPI block boundaries. For precon_f take the upwind - # value, and for precon_ppar take the average. - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - f_lower_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,1] - f_upper_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,end] - end - # We upwind the z-derivatives in `electron_z_advection!()`, so would - # expect that upwinding the results here in z would make sense. - # However, upwinding here makes convergence much slower (~10x), - # compared to picking the values from one side or other of the block - # boundary, or taking the average of the values on either side. - # Neither direction is special, so taking the average seems most - # sensible (although in an intial test it does not seem to converge - # faster than just picking one or the other). - # Maybe this could indicate that it is more important to have a fully - # self-consistent Jacobian inversion for the - # `electron_vpa_advection()` part rather than taking half(ish) of the - # values from one block and the other half(ish) from the other. - reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( - output_buffer_pdf_view, f_lower_endpoints, f_upper_endpoints, receive_buffer1, - receive_buffer2, z) - - begin_serial_region() - @serial_region begin - buffer_1[] = output_buffer_ppar_view[1] - buffer_2[] = output_buffer_ppar_view[end] - end - reconcile_element_boundaries_MPI!( - output_buffer_ppar_view, buffer_1, buffer_2, buffer_3, buffer_4, z) - - return nothing - end - - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin - row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa - this_input_buffer[row] = precon_f[ivpa,ivperp,iz] - end - begin_z_region() - @loop_z iz begin - row = pdf_size + iz - this_input_buffer[row] = precon_ppar[iz] - end - _block_synchronize() - - # Use this to copy current guess from output_buffer to - # intermediate_buffer, to avoid race conditions as new guess is - # written into output_buffer. - function fill_intermediate_buffer!() - _block_synchronize() - for i ∈ global_index_subrange - this_intermediate_buffer[i] = this_output_buffer[i] - end - _block_synchronize() - end - - v_solve_global_inds = adi_info.v_solve_global_inds - v_solve_nsolve = adi_info.v_solve_nsolve - v_solve_implicit_lus = adi_info.v_solve_implicit_lus - v_solve_explicit_matrices = adi_info.v_solve_explicit_matrices - v_solve_buffer = adi_info.v_solve_buffer - v_solve_buffer2 = adi_info.v_solve_buffer2 - function first_adi_v_solve!() - # The initial guess is all-zero, so for the first solve there is - # no need to multiply by the 'explicit matrix' as x==0, so E.x==0 - for isolve ∈ 1:v_solve_nsolve - this_inds = v_solve_global_inds[isolve] - v_solve_buffer .= this_input_buffer[this_inds] - @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) - this_output_buffer[this_inds] .= v_solve_buffer2 - end - end - function adi_v_solve!() - for isolve ∈ 1:v_solve_nsolve - this_inds = v_solve_global_inds[isolve] - v_solve_buffer .= @view this_input_buffer[this_inds] - # Need to multiply the 'explicit matrix' by -1, because all - # the Jacobian-calculation functions are defined as if the - # terms are being added to the left-hand-side preconditioner - # matrix, but here the 'explicit matrix' terms are added on - # the right-hand-side. - @timeit_debug global_timer "mul!" mul!(v_solve_buffer, v_solve_explicit_matrices[isolve], - this_intermediate_buffer, -1.0, 1.0) - @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) - this_output_buffer[this_inds] .= v_solve_buffer2 - end - end - - z_solve_global_inds = adi_info.z_solve_global_inds - z_solve_nsolve = adi_info.z_solve_nsolve - z_solve_implicit_lus = adi_info.z_solve_implicit_lus - z_solve_explicit_matrices = adi_info.z_solve_explicit_matrices - z_solve_buffer = adi_info.z_solve_buffer - z_solve_buffer2 = adi_info.z_solve_buffer2 - function adi_z_solve!() - for isolve ∈ 1:z_solve_nsolve - this_inds = z_solve_global_inds[isolve] - z_solve_buffer .= @view this_input_buffer[this_inds] - # Need to multiply the 'explicit matrix' by -1, because all - # the Jacobian-calculation functions are defined as if the - # terms are being added to the left-hand-side preconditioner - # matrix, but here the 'explicit matrix' terms are added on - # the right-hand-side. - @timeit_debug global_timer "mul!" mul!(z_solve_buffer, z_solve_explicit_matrices[isolve], this_intermediate_buffer, -1.0, 1.0) - @timeit_debug global_timer "ldiv!" ldiv!(z_solve_buffer2, z_solve_implicit_lus[isolve], z_solve_buffer) - this_output_buffer[this_inds] .= z_solve_buffer2 - end - end - - precon_iterations[] += 1 - first_adi_v_solve!() - fill_intermediate_buffer!() - adi_z_solve!() - adi_communicate_boundary_points() - - for n ∈ 1:n_extra_iterations - precon_iterations[] += 1 - fill_intermediate_buffer!() - adi_v_solve!() - fill_intermediate_buffer!() - adi_z_solve!() - adi_communicate_boundary_points() - end - - # Unpack preconditioner solution - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin - row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa - precon_f[ivpa,ivperp,iz] = this_output_buffer[row] - end - begin_z_region() - @loop_z iz begin - row = pdf_size + iz - precon_ppar[iz] = this_output_buffer[row] - end - - return nothing - end - - left_preconditioner = identity - right_preconditioner = adi_precon! - elseif nl_solver_params.preconditioner_type === Val(:none) - left_preconditioner = identity - right_preconditioner = identity - else - error("preconditioner_type=$(nl_solver_params.preconditioner_type) is not " - * "supported by electron_backward_euler!().") - end - - # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the - # electron parallel pressure. - function residual_func!(this_residual, new_variables; krylov=false) - electron_ppar_residual, f_electron_residual = this_residual - electron_ppar_newvar, f_electron_newvar = new_variables - - if evolve_ppar - this_dens = moments.electron.dens - this_upar = moments.electron.upar - this_vth = moments.electron.vth - begin_z_region() - @loop_z iz begin - # update the electron thermal speed using the updated electron - # parallel pressure - this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz] / - (this_dens[iz,ir] * - composition.me_over_mi))) - end - end - - # enforce the boundary condition(s) on the electron pdf - @views enforce_boundary_condition_on_electron_pdf!( - f_electron_newvar, phi, moments.electron.vth[:,ir], - moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, moments, - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir; bc_constraints=false, - update_vcut=!krylov) - - if evolve_ppar - # Calculate heat flux and derivatives using new_variables + step_success = electron_backward_euler!( + old_scratch, new_scratch, moments, phi, collisions, + composition, r, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + t_params, external_source_settings, num_diss_params, + nl_solver_params, ir; evolve_ppar=evolve_ppar, + ion_dt=ion_dt) + + if step_success + apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, r, + z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir, + nl_solver_params) + + if !evolve_ppar + # update the electron heat flux + moments.electron.qpar_updated[] = false @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], - electron_ppar_newvar, + electron_ppar_new, moments.electron.vth[:,ir], - f_electron_newvar, vpa, - ir) + f_electron_new, vpa, ir) - calculate_electron_moment_derivatives_no_r!( - moments, - (electron_density=this_dens, - electron_upar=this_upar, - electron_ppar=electron_ppar_newvar), - scratch_dummy, z, z_spectral, - num_diss_params.electron.moment_dissipation_coefficient, ir) - else - # Calculate heat flux and derivatives using new_variables - @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], - electron_ppar_newvar, - moments.electron.vth[:,ir], - f_electron_newvar, vpa, - ir) # compute the z-derivative of the parallel electron heat flux @views derivative_z!(moments.electron.dqpar_dz[:,ir], moments.electron.qpar[:,ir], buffer_1, buffer_2, buffer_3, buffer_4, z_spectral, z) end - if evolve_ppar - begin_z_region() - @loop_z iz begin - electron_ppar_residual[iz] = electron_ppar_old[iz,ir] - end - else - begin_z_region() - @loop_z iz begin - electron_ppar_residual[iz] = 0.0 - end - end - - # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the - # electron_pdf member of the first argument, so if we set the electron_pdf member - # of the first argument to zero, and pass dt=1, then it will evaluate the time - # derivative, which is the residual for a steady-state solution. - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] - end - electron_kinetic_equation_euler_update!( - f_electron_residual, electron_ppar_residual, f_electron_newvar, - electron_ppar_newvar, moments, z, vperp, vpa, z_spectral, - vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, num_diss_params, t_params, - ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt, - soft_force_constraints=true) - - # Now - # residual = f_electron_old + dt*RHS(f_electron_newvar) - # so update to desired residual - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz] = f_electron_newvar[ivpa,ivperp,iz] - f_electron_residual[ivpa,ivperp,iz] - end - if evolve_ppar - begin_z_region() - @loop_z iz begin - electron_ppar_residual[iz] = electron_ppar_newvar[iz] - electron_ppar_residual[iz] - end - end - - # Set residual to zero where pdf_electron is determined by boundary conditions. - if vpa.n > 1 - begin_z_vperp_region() - @loop_z_vperp iz ivperp begin - @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, - vpa_advect[1].speed[:,ivperp,iz,ir], - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - vpa, vpa_spectral) - end - end - if vperp.n > 1 - begin_z_vpa_region() - enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, - vperp, vperp_spectral, vperp_adv, - vperp_diffusion, ir) - end - zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) - - return nothing - end - - residual = (scratch_dummy.implicit_buffer_z_1, scratch_dummy.implicit_buffer_vpavperpz_1) - delta_x = (scratch_dummy.implicit_buffer_z_2, - scratch_dummy.implicit_buffer_vpavperpz_2) - rhs_delta = (scratch_dummy.implicit_buffer_z_3, - scratch_dummy.implicit_buffer_vpavperpz_3) - v = (scratch_dummy.implicit_buffer_z_4, - scratch_dummy.implicit_buffer_vpavperpz_4) - w = (scratch_dummy.implicit_buffer_z_5, - scratch_dummy.implicit_buffer_vpavperpz_5) - - newton_success = newton_solve!((electron_ppar_new, f_electron_new), - residual_func!, residual, delta_x, rhs_delta, - v, w, nl_solver_params; - left_preconditioner=left_preconditioner, - right_preconditioner=right_preconditioner, - coords=(z=z, vperp=vperp, vpa=vpa)) - if newton_success #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) # update the time following the pdf update t_params.t[] += t_params.dt[] + local_pseudotime += t_params.dt[] if first_step && !reduced_by_ion_dt # Adjust t_params.previous_dt[] which gives the initial timestep for @@ -1597,6 +893,8 @@ global_rank[] == 0 && println("recalculating precon") else t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end + # Ensure dt does not exceed maximum_dt + t_params.dt[] = min(t_params.dt[], t_params.maximum_dt) end first_step = false @@ -1612,6 +910,10 @@ global_rank[] == 0 && println("recalculating precon") # to arrays, because f_electron_old and electron_ppar_old are captured by # residual_func!() above, so any change in the things they refer to will # cause type instability in residual_func!(). + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin f_electron_new[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] @@ -1620,75 +922,34 @@ global_rank[] == 0 && println("recalculating precon") @loop_z iz begin electron_ppar_new[iz] = electron_ppar_old[iz] end - end - new_lowerz_vcut_inds = r.scratch_shared_int - new_upperz_vcut_inds = r.scratch_shared_int2 - apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, - vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, - num_diss_params, composition, ir; - lowerz_vcut_inds=new_lowerz_vcut_inds, - upperz_vcut_inds=new_upperz_vcut_inds) - # Check if either vcut_ind has changed - if either has, recalculate the - # preconditioner because the response at the grid point before the cutoff is - # sharp, so the preconditioner could be significantly wrong when it was - # calculated using the wrong vcut_ind. - lower_vcut_changed = @view z.scratch_shared_int[1:1] - upper_vcut_changed = @view z.scratch_shared_int[2:2] - @serial_region begin - if z.irank == 0 - precon_lowerz_vcut_inds = nl_solver_params.precon_lowerz_vcut_inds - if all(new_lowerz_vcut_inds .== precon_lowerz_vcut_inds) - lower_vcut_changed[] = 0 - else - lower_vcut_changed[] = 1 - precon_lowerz_vcut_inds .= new_lowerz_vcut_inds - end - end - MPI.Bcast!(lower_vcut_changed, comm_inter_block[]; root=0) - #req1 = MPI.Ibcast!(lower_vcut_changed, comm_inter_block[]; root=0) + new_lowerz_vcut_ind = @view r.scratch_shared_int[ir:ir] + new_upperz_vcut_ind = @view r.scratch_shared_int2[ir:ir] + apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, r, + z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir, + nl_solver_params) + + if !evolve_ppar + # update the electron heat flux + moments.electron.qpar_updated[] = false + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) - if z.irank == z.nrank - 1 - precon_upperz_vcut_inds = nl_solver_params.precon_upperz_vcut_inds - if all(new_upperz_vcut_inds .== precon_upperz_vcut_inds) - upper_vcut_changed[] = 0 - else - upper_vcut_changed[] = 1 - precon_upperz_vcut_inds .= new_upperz_vcut_inds - end + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) end - MPI.Bcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) - #req2 = MPI.Ibcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) - - # Eventually can use Ibcast!() to make the two broadcasts run - # simultaneously, but need the function to be merged into MPI.jl (see - # https://github.com/JuliaParallel/MPI.jl/pull/882). - #MPI.Waitall([req1, req2]) - end - _block_synchronize() - if lower_vcut_changed[] == 1 || upper_vcut_changed[] == 1 - # One or both of the indices changed for some `ir`, so force the - # preconditioner to be recalculated next time. - nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval end - if !evolve_ppar - # update the electron heat flux - moments.electron.qpar_updated[] = false - @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], - electron_ppar_new, - moments.electron.vth[:,ir], - f_electron_new, vpa, ir) - - # compute the z-derivative of the parallel electron heat flux - @views derivative_z!(moments.electron.dqpar_dz[:,ir], - moments.electron.qpar[:,ir], buffer_1, buffer_2, - buffer_3, buffer_4, z_spectral, z) - end + reset_nonlinear_per_stage_counters!(nl_solver_params) residual_norm = -1.0 - if newton_success + if step_success # Calculate residuals to decide if iteration is converged. # Might want an option to calculate the r_normesidual only after a certain # number of iterations (especially during initialization when there are @@ -1736,98 +997,882 @@ global_rank[] == 0 && println("recalculating precon") end end end - if ((t_params.step_counter[] % t_params.nwrite_moments == 0) - || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) + if ((t_params.step_counter[] % t_params.nwrite_moments == 0) + || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) + + if r.n == 1 + # For now can only do I/O within the pseudo-timestepping loop when there + # is no r-dimension, because different points in r would take different + # number and length of timesteps to converge. + + # Update the electron heat flux for output. This will be done anyway + # before using qpar in the next residual_func!() call, but there is no + # convenient way to include that calculation in this debug output. + moments.electron.qpar_updated[] = false + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + t_params.write_moments_output[] = false + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], + local_pseudotime, residual_norm, r, z, + vperp, vpa) + end + end + end + end + + t_params.step_counter[] += 1 + if electron_pdf_converged[] + break + end + end + if !electron_pdf_converged[] + # If electron solve failed to converge for some `ir`, the failure will be + # handled by restarting the ion timestep with a smaller dt, so no need to try + # to solve for further `ir` values. + break + end + end + # Update the 'pdf' arrays with the final result + begin_r_z_vperp_vpa_region() + final_scratch_pdf = scratch[t_params.n_rk_stages+1].pdf_electron + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + pdf[ivpa,ivperp,iz,ir] = final_scratch_pdf[ivpa,ivperp,iz,ir] + end + if evolve_ppar + # Update `moments.electron.ppar` with the final electron pressure + begin_r_z_region() + scratch_ppar = scratch[t_params.n_rk_stages+1].electron_ppar + moments_ppar = moments.electron.ppar + @loop_r_z ir iz begin + moments_ppar[iz,ir] = scratch_ppar[iz,ir] + end + end + begin_serial_region() + @serial_region begin + if !electron_pdf_converged[] || do_debug_io + if io_electron !== nothing && io_electron !== true + t_params.moments_output_counter[] += 1 + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], local_pseudotime, + residual_norm, r, z, vperp, vpa) + finish_electron_io(io_electron) + end + end + end + + if r.n > 1 + error("Limits on iteration count and simtime assume 1D simulations. " + * "Need to fix handling of t_params.t[] and t_params.step_counter[], " + * "and also t_params.max_step_count_this_ion_step[] and " + * "t_params.max_t_increment_this_ion_step[]") + else + t_params.max_step_count_this_ion_step[] = + max(t_params.step_counter[] - initial_step_counter, + t_params.max_step_count_this_ion_step[]) + t_params.max_t_increment_this_ion_step[] = + max(t_params.t[] - initial_time, + t_params.max_t_increment_this_ion_step[]) + end + + initial_dt_scale_factor = 0.1 + if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] + # If dt has increased a lot, we can probably try a larger initial dt for the next + # solve. + t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] + end + + if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] + # Reset dt in case it was reduced to be less than 0.5*ion_dt + t_params.dt[] = t_params.previous_dt[] + end + if !electron_pdf_converged[] + success = "kinetic-electrons" + else + success = "" + end + return success +end + +""" + electron_backward_euler!(old_scratch, new_scratch, moments, phi, + collisions, composition, r, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, + external_source_settings, num_diss_params, nl_solver_params, ir; + evolve_ppar=false, ion_dt=nothing) = begin + +Take a single backward euler timestep for the electron shape function \$g_e\$ and parallel +pressure \$p_{e∥}\$. +""" +@timeit global_timer electron_backward_euler!(old_scratch, new_scratch, moments, phi, + collisions, composition, r, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, + external_source_settings, num_diss_params, nl_solver_params, ir; + evolve_ppar=false, ion_dt=nothing) = begin + + # create several 0D dummy arrays for use in taking derivatives + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + + # Calculate heat flux and derivatives using updated f_electron + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + @views calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=moments.electron.dens[:,ir], + electron_upar=moments.electron.upar[:,ir], + electron_ppar=electron_ppar_new), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + if nl_solver_params.preconditioner_type === Val(:electron_split_lu) + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + nl_solver_params.solves_since_precon_update[] = 0 + + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = electron_ppar_new + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + source_amplitude = moments.electron.external_source_amplitude + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + + # Note the region(s) used here must be the same as the region(s) used + # when the matrices are used in `split_precon!()`, so that the + # parallelisation is the same and each matrix is used on the same + # process that created it. + + # z-advection preconditioner + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + z_matrix, ppar_matrix = get_electron_split_Jacobians!( + ivperp, ivpa, ppar, moments, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + @timeit_debug global_timer "lu" nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix)) + if ivperp == 1 && ivpa == 1 + @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) + end + end + end + + function split_precon!(x) + precon_ppar, precon_f = x + + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + z_precon_matrix = nl_solver_params.preconditioners.z[ivpa,ivperp,ir] + f_slice = @view precon_f[ivpa,ivperp,:] + @views z.scratch .= f_slice + @timeit_debug global_timer "ldiv!" ldiv!(z.scratch2, z_precon_matrix, z.scratch) + f_slice .= z.scratch2 + end + + begin_z_region() + ppar_precon_matrix = nl_solver_params.preconditioners.ppar[ir] + @loop_z iz begin + z.scratch[iz] = precon_ppar[iz] + end + + begin_serial_region() + @serial_region begin + @timeit_debug global_timer "ldiv!" ldiv!(precon_ppar, ppar_precon_matrix, z.scratch) + end + end + + left_preconditioner = identity + right_preconditioner = split_precon! + elseif nl_solver_params.preconditioner_type === Val(:electron_lu) + + if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || + t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval +global_rank[] == 0 && println("recalculating precon") + nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = t_params.dt[] + + orig_lu, precon_matrix, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] + + fill_electron_kinetic_equation_Jacobian!( + precon_matrix, f_electron_new, electron_ppar_new, moments, phi, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + + begin_serial_region() + if block_rank[] == 0 + if size(orig_lu) == (1, 1) + # Have not properly created the LU decomposition before, so + # cannot reuse it. + @timeit_debug global_timer "lu" nl_solver_params.preconditioners[ir] = + (lu(sparse(precon_matrix)), precon_matrix, input_buffer, + output_buffer) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(orig_lu, sparse(precon_matrix); check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch") + @timeit_debug global_timer "lu" orig_lu = lu(sparse(precon_matrix)) + end + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + else + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + end + + + @timeit_debug global_timer lu_precon!(x) = begin + precon_ppar, precon_f = x + + precon_lu, _, this_input_buffer, this_output_buffer = + nl_solver_params.preconditioners[ir] + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + this_input_buffer[counter] = precon_f[ivpa,ivperp,iz] + counter += 1 + end + @loop_z iz begin + this_input_buffer[counter] = precon_ppar[iz] + counter += 1 + end + + begin_serial_region() + @serial_region begin + @timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer) + end + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + precon_f[ivpa,ivperp,iz] = this_output_buffer[counter] + counter += 1 + end + @loop_z iz begin + precon_ppar[iz] = this_output_buffer[counter] + counter += 1 + end + + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = precon_ppar[1] + buffer_2[] = precon_ppar[end] + end + reconcile_element_boundaries_MPI!( + precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + + left_preconditioner = identity + right_preconditioner = lu_precon! + elseif nl_solver_params.preconditioner_type === Val(:electron_adi) + + if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || + t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval +global_rank[] == 0 && println("recalculating precon") + nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = t_params.dt[] + + adi_info = nl_solver_params.preconditioners[ir] + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / electron_ppar_new[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + z_speed = @view z_advect[1].speed[:,:,:,ir] + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f_electron_new, + z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, + electron_ppar_new, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f_electron_new[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, 2, vpa_wgts) + end + + v_size = vperp.n * vpa.n + + # Do setup for 'v solves' + v_solve_counter = 0 + A = adi_info.v_solve_matrix_buffer + explicit_J = adi_info.J_buffer + # Get sparse matrix for explicit, right-hand-side part of the + # solve. + if adi_info.n_extra_iterations > 0 + # If we only do one 'iteration' we don't need the 'explicit + # matrix' for the first solve (the v-solve), because the initial + # guess is zero, + fill_electron_kinetic_equation_Jacobian!( + explicit_J, f_electron_new, electron_ppar_new, moments, + phi, collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params, ion_dt, ir, evolve_ppar, :explicit_z, false) + end + begin_z_region() + @loop_z iz begin + v_solve_counter += 1 + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_v_only_Jacobian!( + A, f_electron_new[:,:,iz], electron_ppar_new[iz], + dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, + zeroth_moment[iz], first_moment[iz], second_moment[iz], + third_moment[iz], dthird_moment_dz[iz], phi[iz], collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, iz, evolve_ppar) + A_sparse = sparse(A) + if !isassigned(adi_info.v_solve_implicit_lus, v_solve_counter) + @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.v_solve_implicit_lus[v_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, iz=$iz") + @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) + end + end + + if adi_info.n_extra_iterations > 0 + # If we only do one 'iteration' we don't need the 'explicit + # matrix' for the first solve (the v-solve), because the + # initial guess is zero, + adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:])) + end + end + @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))") + + # Do setup for 'z solves' + z_solve_counter = 0 + A = adi_info.z_solve_matrix_buffer + explicit_J = adi_info.J_buffer + # Get sparse matrix for explicit, right-hand-side part of the + # solve. + fill_electron_kinetic_equation_Jacobian!( + explicit_J, f_electron_new, electron_ppar_new, moments, phi, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, :explicit_v, false) + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + z_solve_counter += 1 + + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_z_only_Jacobian_f!( + A, f_electron_new[ivpa,ivperp,:], electron_ppar_new, + dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed, + moments, zeroth_moment, first_moment, second_moment, + third_moment, dthird_moment_dz, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, ir, ivperp, ivpa, + evolve_ppar) + + A_sparse = sparse(A) + if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, ivperp=$ivperp, ivpa=$ivpa") + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + end + end + + adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) + end + begin_serial_region(; no_synchronize=true) + @serial_region begin + # Do the solve for ppar on the rank-0 process, which has the + # fewest grid points to handle if there are not an exactly equal + # number of points for each process. + z_solve_counter += 1 + + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!( + A, electron_ppar_new, moments, zeroth_moment, first_moment, + second_moment, third_moment, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + + A_sparse = sparse(A) + if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, ppar z-solve") + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + end + end + + adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) + end + @boundscheck z_solve_counter == adi_info.z_solve_nsolve || error("z_solve_counter($z_solve_counter) != z_solve_nsolve($(adi_info.z_solve_nsolve))") + end + + @timeit_debug global_timer adi_precon!(x) = begin + precon_ppar, precon_f = x + + adi_info = nl_solver_params.preconditioners[ir] + precon_iterations = nl_solver_params.precon_iterations + this_input_buffer = adi_info.input_buffer + this_intermediate_buffer = adi_info.intermediate_buffer + this_output_buffer = adi_info.output_buffer + global_index_subrange = adi_info.global_index_subrange + n_extra_iterations = adi_info.n_extra_iterations + + v_size = vperp.n * vpa.n + pdf_size = z.n * v_size + + # Use these views to communicate block-boundary points + output_buffer_pdf_view = reshape(@view(this_output_buffer[1:pdf_size]), size(precon_f)) + output_buffer_ppar_view = @view(this_output_buffer[pdf_size+1:end]) + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + + function adi_communicate_boundary_points() + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + output_buffer_pdf_view, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = output_buffer_ppar_view[1] + buffer_2[] = output_buffer_ppar_view[end] + end + reconcile_element_boundaries_MPI!( + output_buffer_ppar_view, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + this_input_buffer[row] = precon_f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + row = pdf_size + iz + this_input_buffer[row] = precon_ppar[iz] + end + _block_synchronize() + + # Use this to copy current guess from output_buffer to + # intermediate_buffer, to avoid race conditions as new guess is + # written into output_buffer. + function fill_intermediate_buffer!() + _block_synchronize() + for i ∈ global_index_subrange + this_intermediate_buffer[i] = this_output_buffer[i] + end + _block_synchronize() + end - if r.n == 1 - # For now can only do I/O within the pseudo-timestepping loop when there - # is no r-dimension, because different points in r would take different - # number and length of timesteps to converge. - begin_serial_region() - t_params.moments_output_counter[] += 1 - @serial_region begin - if io_electron !== nothing - t_params.write_moments_output[] = false - write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, - vpa) - end - end + v_solve_global_inds = adi_info.v_solve_global_inds + v_solve_nsolve = adi_info.v_solve_nsolve + v_solve_implicit_lus = adi_info.v_solve_implicit_lus + v_solve_explicit_matrices = adi_info.v_solve_explicit_matrices + v_solve_buffer = adi_info.v_solve_buffer + v_solve_buffer2 = adi_info.v_solve_buffer2 + function first_adi_v_solve!() + # The initial guess is all-zero, so for the first solve there is + # no need to multiply by the 'explicit matrix' as x==0, so E.x==0 + for isolve ∈ 1:v_solve_nsolve + this_inds = v_solve_global_inds[isolve] + v_solve_buffer .= this_input_buffer[this_inds] + @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) + this_output_buffer[this_inds] .= v_solve_buffer2 + end + end + function adi_v_solve!() + for isolve ∈ 1:v_solve_nsolve + this_inds = v_solve_global_inds[isolve] + v_solve_buffer .= @view this_input_buffer[this_inds] + # Need to multiply the 'explicit matrix' by -1, because all + # the Jacobian-calculation functions are defined as if the + # terms are being added to the left-hand-side preconditioner + # matrix, but here the 'explicit matrix' terms are added on + # the right-hand-side. + @timeit_debug global_timer "mul!" mul!(v_solve_buffer, v_solve_explicit_matrices[isolve], + this_intermediate_buffer, -1.0, 1.0) + @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) + this_output_buffer[this_inds] .= v_solve_buffer2 end end - reset_nonlinear_per_stage_counters!(nl_solver_params) + z_solve_global_inds = adi_info.z_solve_global_inds + z_solve_nsolve = adi_info.z_solve_nsolve + z_solve_implicit_lus = adi_info.z_solve_implicit_lus + z_solve_explicit_matrices = adi_info.z_solve_explicit_matrices + z_solve_buffer = adi_info.z_solve_buffer + z_solve_buffer2 = adi_info.z_solve_buffer2 + function adi_z_solve!() + for isolve ∈ 1:z_solve_nsolve + this_inds = z_solve_global_inds[isolve] + z_solve_buffer .= @view this_input_buffer[this_inds] + # Need to multiply the 'explicit matrix' by -1, because all + # the Jacobian-calculation functions are defined as if the + # terms are being added to the left-hand-side preconditioner + # matrix, but here the 'explicit matrix' terms are added on + # the right-hand-side. + @timeit_debug global_timer "mul!" mul!(z_solve_buffer, z_solve_explicit_matrices[isolve], this_intermediate_buffer, -1.0, 1.0) + @timeit_debug global_timer "ldiv!" ldiv!(z_solve_buffer2, z_solve_implicit_lus[isolve], z_solve_buffer) + this_output_buffer[this_inds] .= z_solve_buffer2 + end + end - t_params.step_counter[] += 1 - if electron_pdf_converged[] - break + precon_iterations[] += 1 + first_adi_v_solve!() + fill_intermediate_buffer!() + adi_z_solve!() + adi_communicate_boundary_points() + + for n ∈ 1:n_extra_iterations + precon_iterations[] += 1 + fill_intermediate_buffer!() + adi_v_solve!() + fill_intermediate_buffer!() + adi_z_solve!() + adi_communicate_boundary_points() end + + # Unpack preconditioner solution + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + precon_f[ivpa,ivperp,iz] = this_output_buffer[row] + end + begin_z_region() + @loop_z iz begin + row = pdf_size + iz + precon_ppar[iz] = this_output_buffer[row] + end + + return nothing end - if !electron_pdf_converged[] - # If electron solve failed to converge for some `ir`, the failure will be - # handled by restarting the ion timestep with a smaller dt, so no need to try - # to solve for further `ir` values. - break - end - end - # Update the 'pdf' arrays with the final result - begin_r_z_vperp_vpa_region() - final_scratch_pdf = scratch[t_params.n_rk_stages+1].pdf_electron - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - pdf[ivpa,ivperp,iz,ir] = final_scratch_pdf[ivpa,ivperp,iz,ir] + + left_preconditioner = identity + right_preconditioner = adi_precon! + elseif nl_solver_params.preconditioner_type === Val(:none) + left_preconditioner = identity + right_preconditioner = identity + else + error("preconditioner_type=$(nl_solver_params.preconditioner_type) is not " + * "supported by electron_backward_euler!().") end - if evolve_ppar - # Update `moments.electron.ppar` with the final electron pressure - begin_r_z_region() - scratch_ppar = scratch[t_params.n_rk_stages+1].electron_ppar - moments_ppar = moments.electron.ppar - @loop_r_z ir iz begin - moments_ppar[iz,ir] = scratch_ppar[iz,ir] + + # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the + # electron parallel pressure. + function residual_func!(this_residual, new_variables; krylov=false) + electron_ppar_residual, f_electron_residual = this_residual + electron_ppar_newvar, f_electron_newvar = new_variables + + if evolve_ppar + this_dens = moments.electron.dens + this_upar = moments.electron.upar + this_vth = moments.electron.vth + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end end - end - begin_serial_region() - @serial_region begin - if !electron_pdf_converged[] || do_debug_io - if io_electron !== nothing && io_electron !== true - t_params.moments_output_counter[] += 1 - write_electron_state(scratch, moments, t_params, io_electron, - t_params.moments_output_counter[], r, z, vperp, vpa) - finish_electron_io(io_electron) + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron_newvar, phi, moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, + update_vcut=!krylov) + + if evolve_ppar + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=electron_ppar_newvar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + else + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + end + + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_old[iz,ir] + end + else + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = 0.0 end end - end - if r.n > 1 - error("Limits on iteration count and simtime assume 1D simulations. " - * "Need to fix handling of t_params.t[] and t_params.step_counter[], " - * "and also t_params.max_step_count_this_ion_step[] and " - * "t_params.max_t_increment_this_ion_step[]") - else - t_params.max_step_count_this_ion_step[] = - max(t_params.step_counter[] - initial_step_counter, - t_params.max_step_count_this_ion_step[]) - t_params.max_t_increment_this_ion_step[] = - max(t_params.t[] - initial_time, - t_params.max_t_increment_this_ion_step[]) - end + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] + end + electron_kinetic_equation_euler_update!( + f_electron_residual, electron_ppar_residual, f_electron_newvar, + electron_ppar_newvar, moments, z, vperp, vpa, z_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, num_diss_params, t_params, + ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt, + soft_force_constraints=true) + + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_newvar[ivpa,ivperp,iz] - f_electron_residual[ivpa,ivperp,iz] + end + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_newvar[iz] - electron_ppar_residual[iz] + end + end - initial_dt_scale_factor = 0.1 - if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] - # If dt has increased a lot, we can probably try a larger initial dt for the next - # solve. - t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] - end + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) - if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] - # Reset dt in case it was reduced to be less than 0.5*ion_dt - t_params.dt[] = t_params.previous_dt[] - end - if !electron_pdf_converged[] - success = "kinetic-electrons" - else - success = "" + return nothing end - return success + + residual = (scratch_dummy.implicit_buffer_z_1, scratch_dummy.implicit_buffer_vpavperpz_1) + delta_x = (scratch_dummy.implicit_buffer_z_2, + scratch_dummy.implicit_buffer_vpavperpz_2) + rhs_delta = (scratch_dummy.implicit_buffer_z_3, + scratch_dummy.implicit_buffer_vpavperpz_3) + v = (scratch_dummy.implicit_buffer_z_4, + scratch_dummy.implicit_buffer_vpavperpz_4) + w = (scratch_dummy.implicit_buffer_z_5, + scratch_dummy.implicit_buffer_vpavperpz_5) + + newton_success = newton_solve!((electron_ppar_new, f_electron_new), + residual_func!, residual, delta_x, rhs_delta, + v, w, nl_solver_params; + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, + coords=(z=z, vperp=vperp, vpa=vpa)) + + return newton_success end """ @@ -1880,21 +1925,157 @@ to allow the outer r-loop to be parallelised. newton_success = false for ir ∈ 1:r.n + f_electron = @view pdf_electron_out[:,:,:,ir] + ppar = @view electron_ppar_out[:,ir] + phi = @view fields.phi[:,ir] + + function recalculate_preconditioner!() + if nl_solver_params.preconditioner_type === Val(:electron_lu) +global_rank[] == 0 && println("recalculating precon") + nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = ion_dt + + orig_lu, precon_matrix, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] + + fill_electron_kinetic_equation_Jacobian!( + precon_matrix, f_electron, ppar, moments, phi, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, ir, true, :all, true, false) + + begin_serial_region() + if block_rank[] == 0 + if size(orig_lu) == (1, 1) + # Have not properly created the LU decomposition before, so + # cannot reuse it. + @timeit_debug global_timer "lu" nl_solver_params.preconditioners[ir] = + (lu(sparse(precon_matrix)), precon_matrix, input_buffer, + output_buffer) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(orig_lu, sparse(precon_matrix); check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch") + @timeit_debug global_timer "lu" orig_lu = lu(sparse(precon_matrix)) + end + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + else + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + + return nothing + end + end + + if nl_solver_params.preconditioner_type === Val(:electron_lu) + if ion_dt > 1.5 * nl_solver_params.precon_dt[] || + ion_dt < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + recalculate_preconditioner!() + end + + @timeit_debug global_timer lu_precon!(x) = begin + precon_ppar, precon_f = x + + precon_lu, _, this_input_buffer, this_output_buffer = + nl_solver_params.preconditioners[ir] + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + this_input_buffer[counter] = precon_f[ivpa,ivperp,iz] + counter += 1 + end + @loop_z iz begin + this_input_buffer[counter] = precon_ppar[iz] + counter += 1 + end + + begin_serial_region() + @serial_region begin + @timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer) + end + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + precon_f[ivpa,ivperp,iz] = this_output_buffer[counter] + counter += 1 + end + @loop_z iz begin + precon_ppar[iz] = this_output_buffer[counter] + counter += 1 + end + + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = precon_ppar[1] + buffer_2[] = precon_ppar[end] + end + reconcile_element_boundaries_MPI!( + precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + + left_preconditioner = identity + right_preconditioner = lu_precon! + elseif nl_solver_params.preconditioner_type === Val(:none) + left_preconditioner = identity + right_preconditioner = identity + else + error("preconditioner_type=$(nl_solver_params.preconditioner_type) is not " + * "supported by electron_backward_euler!().") + end + function residual_func!(residual, new_variables; debug=false, krylov=false) electron_ppar_residual, f_electron_residual = residual electron_ppar_new, f_electron_new = new_variables - apply_electron_bc_and_constraints_no_r!(f_electron_new, fields.phi, moments, - z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, - num_diss_params, composition, ir) - - # Calculate heat flux and derivatives using new_variables - @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], - electron_ppar_new, - moments.electron.vth[:,ir], - f_electron_new, vpa, ir) - this_dens = moments.electron.dens this_upar = moments.electron.upar this_vth = moments.electron.vth @@ -1906,6 +2087,23 @@ to allow the outer r-loop to be parallelised. (this_dens[iz,ir] * composition.me_over_mi))) end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron_new, phi, moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, + update_vcut=!krylov) + + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( moments, (electron_density=this_dens, @@ -1937,10 +2135,8 @@ to allow the outer r-loop to be parallelised. f_electron_residual, electron_ppar_residual, f_electron_new, electron_ppar_new, moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, composition, external_source_settings, - num_diss_params, t_params, ir; soft_force_constraints=true) - @loop_z_vperp_vpa iz ivperp ivpa begin - f_electron_residual[ivpa,ivperp,iz] /= sqrt(1.0 + vpa.grid[ivpa]^2) - end + num_diss_params, t_params, ir; evolve_ppar=true, ion_dt=ion_dt, + soft_force_constraints=true) # Set residual to zero where pdf_electron is determined by boundary conditions. if vpa.n > 1 @@ -1972,13 +2168,12 @@ to allow the outer r-loop to be parallelised. w = (scratch_dummy.implicit_buffer_z_5, scratch_dummy.implicit_buffer_vpavperpz_5) - @views newton_success = newton_solve!((electron_ppar_out[:,ir], - pdf_electron_out[:,:,:,ir]), - residual_func!, residual, delta_x, - rhs_delta, v, w, nl_solver_params; - left_preconditioner=nothing, - right_preconditioner=nothing, - coords=(z=z, vperp=vperp, vpa=vpa)) + newton_success = newton_solve!((ppar, f_electron), residual_func!, residual, + delta_x, rhs_delta, v, w, nl_solver_params; + left_preconditioner=nothing, + right_preconditioner=nothing, + recalculate_preconditioner=recalculate_preconditioner!, + coords=(z=z, vperp=vperp, vpa=vpa)) if !newton_success break end @@ -2074,8 +2269,9 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vp moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, - upperz_vcut_inds=upperz_vcut_inds) + composition.me_over_mi, ir; + lowerz_vcut_ind=(lowerz_vcut_inds === nothing ? nothing : lowerz_vcut_inds[ir]), + upperz_vcut_ind=(upperz_vcut_inds === nothing ? nothing : upperz_vcut_inds[ir])) end begin_r_z_region() @@ -2096,23 +2292,68 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vp end end -function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp, +function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, r, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition, - ir; lowerz_vcut_inds=nothing, - upperz_vcut_inds=nothing) + ir, nl_solver_params) begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0) end + new_lowerz_vcut_ind = @view r.scratch_shared_int[ir:ir] + new_upperz_vcut_ind = @view r.scratch_shared_int2[ir:ir] + # enforce the boundary condition(s) on the electron pdf @views enforce_boundary_condition_on_electron_pdf!( f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, - upperz_vcut_inds=upperz_vcut_inds) + composition.me_over_mi, ir; lowerz_vcut_ind=new_lowerz_vcut_ind, + upperz_vcut_ind=new_upperz_vcut_ind) + + # Check if either vcut_ind has changed - if either has, recalculate the + # preconditioner because the response at the grid point before the cutoff is + # sharp, so the preconditioner could be significantly wrong when it was + # calculated using the wrong vcut_ind. + lower_vcut_changed = @view z.scratch_shared_int[1:1] + upper_vcut_changed = @view z.scratch_shared_int[2:2] + @serial_region begin + if z.irank == 0 + precon_lowerz_vcut_inds = nl_solver_params.precon_lowerz_vcut_inds + if new_lowerz_vcut_ind[] == precon_lowerz_vcut_inds[ir] + lower_vcut_changed[] = 0 + else + lower_vcut_changed[] = 1 + precon_lowerz_vcut_inds[ir] = new_lowerz_vcut_ind[] + end + end + MPI.Bcast!(lower_vcut_changed, comm_inter_block[]; root=0) + #req1 = MPI.Ibcast!(lower_vcut_changed, comm_inter_block[]; root=0) + + if z.irank == z.nrank - 1 + precon_upperz_vcut_inds = nl_solver_params.precon_upperz_vcut_inds + if new_upperz_vcut_ind[] == precon_upperz_vcut_inds[ir] + upper_vcut_changed[] = 0 + else + upper_vcut_changed[] = 1 + precon_upperz_vcut_inds[ir] = new_upperz_vcut_ind[] + end + end + MPI.Bcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + #req2 = MPI.Ibcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + + # Eventually can use Ibcast!() to make the two broadcasts run + # simultaneously, but need the function to be merged into MPI.jl (see + # https://github.com/JuliaParallel/MPI.jl/pull/882). + #MPI.Waitall([req1, req2]) + end + _block_synchronize() + if lower_vcut_changed[] == 1 || upper_vcut_changed[] == 1 + # One or both of the indices changed for some `ir`, so force the + # preconditioner to be recalculated next time. + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end begin_z_region() A = moments.electron.constraints_A_coefficient @@ -2595,8 +2836,8 @@ end @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir; - bc_constraints=true, update_vcut=true, lowerz_vcut_inds=nothing, - upperz_vcut_inds=nothing) = begin + bc_constraints=true, update_vcut=true, lowerz_vcut_ind=nothing, + upperz_vcut_ind=nothing) = begin @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value") @@ -2796,13 +3037,13 @@ end # plus_vcut_ind+1 where vcut is. vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) if vcut_fraction > 0.5 - if lowerz_vcut_inds !== nothing - lowerz_vcut_inds[ir] = plus_vcut_ind+1 + if lowerz_vcut_ind !== nothing + lowerz_vcut_ind[] = plus_vcut_ind+1 end pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5 else - if lowerz_vcut_inds !== nothing - lowerz_vcut_inds[ir] = plus_vcut_ind + if lowerz_vcut_ind !== nothing + lowerz_vcut_ind[] = plus_vcut_ind end pdf[plus_vcut_ind+1,1,1] = 0.0 pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5 @@ -2992,13 +3233,13 @@ end # minus_vcut_ind where -vcut is. vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) if vcut_fraction < 0.5 - if upperz_vcut_inds !== nothing - upperz_vcut_inds[ir] = minus_vcut_ind-1 + if upperz_vcut_ind !== nothing + upperz_vcut_ind[] = minus_vcut_ind-1 end pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction else - if upperz_vcut_inds !== nothing - upperz_vcut_inds[ir] = minus_vcut_ind + if upperz_vcut_ind !== nothing + upperz_vcut_ind[] = minus_vcut_ind end pdf[minus_vcut_ind-1,1,end] = 0.0 pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction @@ -4245,8 +4486,9 @@ appropriate. end adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, - error_norm_method, "", 0.0, composition; - electron=true, local_max_dt=local_max_dt) + error_norm_method, "", 0.0, false, false, + composition; electron=true, + local_max_dt=local_max_dt) if t_params.previous_dt[] == 0.0 # Timestep failed, so reset scratch[t_params.n_rk_stages+1] equal to # scratch[1] to start the timestep over. @@ -4463,7 +4705,8 @@ end collisions, composition, external_source_settings, num_diss_params, t_params, ir; - evolve_ppar=false, ion_dt=nothing) + evolve_ppar=false, ion_dt=nothing, + soft_force_constraints=false) Do a forward-Euler update of the electron kinetic equation. @@ -4532,18 +4775,20 @@ Note that this function operates on a single point in `r`, given by `ir`, and `f composition, external_source_settings.electron, num_diss_params, z, ir) if ion_dt !== nothing - # Add source term to turn steady state solution into a backward-Euler update of - # electron_ppar with the ion timestep `ion_dt`. - ppar_previous_ion_step = moments.electron.ppar - begin_z_region() - @loop_z iz begin - # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a - # source/damping term so that in the steady state of the electron - # pseudo-timestepping iteration, - # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, - # resulting in a backward-Euler step (as long as the pseudo-timestepping - # loop converges). - ppar_out[iz] += -dt * (ppar_in[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt + @timeit global_timer "electron_ppar_ion_dt" begin + # Add source term to turn steady state solution into a backward-Euler + # update of electron_ppar with the ion timestep `ion_dt`. + ppar_previous_ion_step = moments.electron.ppar + begin_z_region() + @loop_z iz begin + # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a + # source/damping term so that in the steady state of the electron + # pseudo-timestepping iteration, + # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, + # resulting in a backward-Euler step (as long as the + # pseudo-timestepping loop converges). + ppar_out[iz] += -dt * (ppar_in[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt + end end end end @@ -4562,14 +4807,20 @@ end Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if `evolve_ppar=true`) the electron energy equation. + +`add_identity=false` can be passed to skip adding 1's on the diagonal. Doing this would +produce the Jacobian for a steady-state solve, rather than a backward-Euler timestep +solve. [Note that rows representing boundary points still need the 1 added to their +diagonal, as that 1 is used to impose the boundary condition, not to represent the 'new f' +in the time derivative term as it is for the non-boundary points.] """ @timeit global_timer fill_electron_kinetic_equation_Jacobian!( jacobian_matrix, f, ppar, moments, phi_global, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, include=:all, - include_qpar_integral_terms=true) = begin + evolve_ppar, include=:all, include_qpar_integral_terms=true, + add_identity=true) = begin dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -4604,6 +4855,8 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati pdf_size = z.n * vperp.n * vpa.n v_size = vperp.n * vpa.n + z_speed = @view z_advect[1].speed[:,:,:,ir] + # Initialise jacobian_matrix to the identity begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin @@ -4611,7 +4864,11 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa jacobian_matrix[row,:] .= 0.0 - if include === :all + if (include === :all + && (add_identity + || skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed)) + ) jacobian_matrix[row,row] += 1.0 end end @@ -4621,13 +4878,11 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati row = pdf_size + iz jacobian_matrix[row,:] .= 0.0 - if include === :all + if include === :all && add_identity jacobian_matrix[row,row] += 1.0 end end - z_speed = @view z_advect[1].speed[:,:,:,ir] - if include ∈ (:all, :explicit_v) dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] begin_vperp_vpa_region() @@ -5268,8 +5523,8 @@ function add_source_term!(source_term, vpa, z, dvth_dz) return nothing end -function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vpa, - vpa_spectral, num_diss_params, dt) +@timeit global_timer add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, + vpa, vpa_spectral, num_diss_params, dt) = begin if num_diss_params.electron.vpa_dissipation_coefficient ≤ 0.0 return nothing end @@ -5277,7 +5532,7 @@ function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vp begin_z_vperp_region() @loop_z_vperp iz ivperp begin @views second_derivative!(vpa.scratch, pdf_in[:,ivperp,iz], vpa, vpa_spectral) - @. pdf_out[:,ivperp,iz] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch + @views @. pdf_out[:,ivperp,iz] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch end return nothing end @@ -5550,13 +5805,14 @@ function calculate_pdf_dot_prefactor!(pdf_dot_prefactor, ppar, vth, dens, ddens_ end # add contribution to the residual coming from the term proporational to the pdf -function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, moments, vpa, - z, dt, electron_source_settings, ir) +@timeit global_timer add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, + moments, vpa, z, dt, + electron_source_settings, ir) = begin vth = @view moments.electron.vth[:,ir] ddens_dz = @view moments.electron.ddens_dz[:,ir] dvth_dz = @view moments.electron.dvth_dz[:,ir] dqpar_dz = @view moments.electron.dqpar_dz[:,ir] - begin_z_vperp_vpa_region() + begin_z_vperp_region() @loop_z iz begin this_dqpar_dz = dqpar_dz[iz] this_ppar = ppar[iz] @@ -5565,11 +5821,11 @@ function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, mome this_dens = dens[iz] this_dvth_dz = dvth_dz[iz] this_vth = vth[iz] - @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz] += - dt * (-0.5 * this_dqpar_dz / this_ppar - vpa[ivpa] * this_vth * + @loop_vperp ivperp begin + @views @. pdf_out[:,ivperp,iz] += + dt * (-0.5 * this_dqpar_dz / this_ppar - vpa * this_vth * (this_ddens_dz / this_dens - this_dvth_dz / this_vth)) * - pdf_in[ivpa,ivperp,iz] + pdf_in[:,ivperp,iz] end end @@ -5579,11 +5835,11 @@ function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, mome @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, ir, index] @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, ir, index] @loop_z iz begin - term = dt * (1.5 * source_density_amplitude[iz,ir] / dens[iz,ir] - - (0.5 * source_pressure_amplitude[iz,ir] + - source_momentum_amplitude[iz,ir]) / ppar[iz,ir]) - @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= term * pdf_in[ivpa,ivperp,iz,ir] + term = dt * (1.5 * source_density_amplitude[iz] / dens[iz] - + (0.5 * source_pressure_amplitude[iz] + + source_momentum_amplitude[iz]) / ppar[iz]) + @loop_vperp ivperp begin + @views @. pdf_out[:,ivperp,iz] -= term * pdf_in[:,ivperp,iz] end end end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 2ffcc3298..07e14d876 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -61,10 +61,10 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, dppar_dz = @view moments.electron.dppar_dz[:,ir] dqpar_dz = @view moments.electron.dqpar_dz[:,ir] dvth_dz = @view moments.electron.dvth_dz[:,ir] - speed = advect.speed + speed = @view advect.speed[:,:,:,ir] # calculate the advection speed in wpa @loop_z_vperp_vpa iz ivperp ivpa begin - speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) + speed[ivpa,ivperp,iz] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz]) end @@ -80,8 +80,8 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, 2.0 * upar[iz] * source_momentum_amplitude[iz]) / ppar[iz] + 0.5 * source_density_amplitude[iz] / density[iz] - @loop_vperp_vpa ivperp ivpa begin - speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa + @loop_vperp ivperp begin + @. speed[:,ivperp,iz] += term1 + vpa * term2_over_vpa end end end diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 855797489..4e70b720e 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -1048,14 +1048,13 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p this_upar = electron_upar[iz] this_prefactor = dt * this_vth / electron_density[iz] * vth_factor * source_amplitude[iz] - @loop_vperp_vpa ivperp ivpa begin + @loop_vperp ivperp begin # Factor of 1/sqrt(π) (for 1V) or 1/π^(3/2) (for 2V/3V) is absorbed by the # normalisation of F vperp_unnorm = vperp_grid[ivperp] * this_vth - vpa_unnorm = vpa_grid[ivpa] * this_vth + this_upar - pdf_out[ivpa,ivperp,iz] += + @. pdf_out[:,ivperp,iz] += this_prefactor * - exp(-(vperp_unnorm^2 + vpa_unnorm^2) * me_over_mi / source_T) + exp(-(vperp_unnorm^2 + (vpa_grid * this_vth + this_upar)^2) * me_over_mi / source_T) end end diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 92fe8256c..a2d8eaddf 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -320,8 +320,12 @@ struct io_initial_electron_info{Tfile, Tfe, Tmom, Texte1, Texte2, Texte3, Texte4 electron_constraints_C_coefficient::Tconstr # cumulative number of electron pseudo-timesteps taken electron_step_counter::Telectronint + # local electron pseudo-time + electron_local_pseudotime::Telectrontime # cumulative electron pseudo-time electron_cumulative_pseudotime::Telectrontime + # current residual for the electron pseudo-timestepping loop + electron_residual::Telectrontime # current electron pseudo-timestep size electron_dt::Telectrontime # size of last electron pseudo-timestep before the output was written @@ -533,6 +537,10 @@ function setup_electron_io(io_input, vpa, vperp, z, r, composition, collisions, io_pseudotime = create_dynamic_variable!(dynamic, "time", mk_float; parallel_io=parallel_io, description="pseudotime used for electron initialization") + io_local_pseudotime = create_dynamic_variable!(dynamic, "electron_local_pseudotime", mk_float; parallel_io=parallel_io, + description="pseudotime within a single pseudotimestepping loop") + io_electron_residual = create_dynamic_variable!(dynamic, "electron_residual", mk_float; parallel_io=parallel_io, + description="residual for electron pseudotimestepping loop") io_f_electron = create_dynamic_variable!(dynamic, "f_electron", mk_float, vpa, vperp, z, r; parallel_io=parallel_io, @@ -642,7 +650,9 @@ function reopen_initial_electron_io(file_info) getvar("electron_constraints_B_coefficient"), getvar("electron_constraints_C_coefficient"), getvar("electron_step_counter"), + getvar("electron_local_pseudotime"), getvar("electron_cumulative_pseudotime"), + getvar("electron_residual"), getvar("electron_dt"), getvar("electron_previous_dt"), getvar("electron_failure_counter"), @@ -3281,9 +3291,9 @@ binary output file # add the distribution function data at this time slice to the output file write_ion_dfns_data_to_binary(scratch, t_params, n_ion_species, io_dfns, t_idx, r, z, vperp, vpa) - if scratch_electron !== nothing - write_electron_dfns_data_to_binary(scratch_electron, t_params, io_dfns, t_idx, - r, z, vperp, vpa) + if t_params.implicit_electron_time_evolving || scratch_electron !== nothing + write_electron_dfns_data_to_binary(scratch, scratch_electron, t_params, + io_dfns, t_idx, r, z, vperp, vpa) end write_neutral_dfns_data_to_binary(scratch, t_params, n_neutral_species, io_dfns, t_idx, r, z, vzeta, vr, vz) @@ -3322,7 +3332,7 @@ write time-dependent distribution function data for electrons to the binary outp Note: should only be called from within a function that (re-)opens the output file. """ -function write_electron_dfns_data_to_binary(scratch_electron, t_params, +function write_electron_dfns_data_to_binary(scratch, scratch_electron, t_params, io_dfns::Union{io_dfns_info,io_initial_electron_info}, t_idx, r, z, vperp, vpa) @serial_region begin @@ -3331,22 +3341,27 @@ function write_electron_dfns_data_to_binary(scratch_electron, t_params, parallel_io = io_dfns.io_input.parallel_io if io_dfns.f_electron !== nothing - if t_params.electron === nothing + if t_params.implicit_electron_time_evolving + n_rk_stages = t_params.n_rk_stages + this_scratch = scratch + elseif t_params.electron === nothing # t_params is the t_params for electron timestepping n_rk_stages = t_params.n_rk_stages + this_scratch = scratch_electron else n_rk_stages = t_params.electron.n_rk_stages + this_scratch = scratch_electron end append_to_dynamic_var(io_dfns.f_electron, - scratch_electron[n_rk_stages+1].pdf_electron, + this_scratch[n_rk_stages+1].pdf_electron, t_idx, parallel_io, vpa, vperp, z, r) # If options were not set to select the following outputs, then the io # variables will be `nothing` and nothing will be written. append_to_dynamic_var(io_dfns.f_electron_loworder, - scratch_electron[2].pdf_electron, + this_scratch[2].pdf_electron, t_idx, parallel_io, vpa, vperp, z, r) append_to_dynamic_var(io_dfns.f_electron_start_last_timestep, - scratch_electron[1].pdf_electron, + this_scratch[1].pdf_electron, t_idx, parallel_io, vpa, vperp, z, r) end end @@ -3385,12 +3400,14 @@ end """ write_electron_state(scratch_electron, moments, t_params, io_initial_electron, - t_idx, r, z, vperp, vpa; pdf_electron_converged=false) + t_idx, local_pseudotime, electron_residual, r, z, vperp, vpa; + pdf_electron_converged=false) Write the electron state to an output file. """ function write_electron_state(scratch_electron, moments, t_params, - io_or_file_info_initial_electron, t_idx, r, z, vperp, vpa; + io_or_file_info_initial_electron, t_idx, local_pseudotime, + electron_residual, r, z, vperp, vpa; pdf_electron_converged=false) @serial_region begin @@ -3409,10 +3426,14 @@ function write_electron_state(scratch_electron, moments, t_params, # add the pseudo-time for this time slice to the hdf5 file append_to_dynamic_var(io_initial_electron.time, t_params.t[], t_idx, parallel_io) + append_to_dynamic_var(io_initial_electron.electron_local_pseudotime, local_pseudotime, + t_idx, parallel_io) append_to_dynamic_var(io_initial_electron.electron_cumulative_pseudotime, t_params.t[], t_idx, parallel_io) + append_to_dynamic_var(io_initial_electron.electron_residual, electron_residual, + t_idx, parallel_io) - write_electron_dfns_data_to_binary(scratch_electron, t_params, + write_electron_dfns_data_to_binary(nothing, scratch_electron, t_params, io_initial_electron, t_idx, r, z, vperp, vpa) write_electron_moments_data_to_binary(scratch_electron, moments, t_params, diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 8bfd386ea..bb5135ba9 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -23,7 +23,7 @@ export get_QQ_local! using FastGaussQuadrature using LegendrePolynomials: Pl, dnPl -using LinearAlgebra: mul!, lu, LU +using LinearAlgebra: mul!, lu, ldiv! using SparseArrays: sparse, AbstractSparseArray using SparseMatricesCSR using ..type_definitions: mk_float, mk_int @@ -459,8 +459,7 @@ Function to carry out a 1D (global) mass matrix solve. """ function mass_matrix_solve!(f, b, spectral::gausslegendre_info) # invert mass matrix system - y = spectral.mass_matrix_lu \ b - @. f = y + ldiv!(f, spectral.mass_matrix_lu, b) return nothing end diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index c7ec624d4..536137c98 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -707,6 +707,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field if !skip_electron_solve # Can't let this counter stay set to 0 t_params.electron.dfns_output_counter[] = max(t_params.electron.dfns_output_counter[], 1) + implicit_electron_pseudotimestep = (nl_solver_params.electron_advance !== nothing) + electron_solution_method = implicit_electron_pseudotimestep ? "backward_euler" : "artificial_time_derivative" success = @views update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, @@ -721,7 +723,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field io_electron=io_initial_electron, initial_time=code_time, residual_tolerance=t_input["initialization_residual_value"], - evolve_ppar=true) + evolve_ppar=true, + solution_method=electron_solution_method) if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") @@ -767,6 +770,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, nl_solver_params.electron_advance.max_linear_iterations_this_step, + nl_solver_params.electron_advance.total_its_soft_limit, nl_solver_params.electron_advance.preconditioner_type, nl_solver_params.electron_advance.preconditioner_update_interval, nl_solver_params.electron_advance.preconditioners, @@ -795,7 +799,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_initial_electron, - evolve_ppar=true, ion_dt=t_params.dt[]) + evolve_ppar=true, ion_dt=t_params.dt[], + solution_method=electron_solution_method) end if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" @@ -807,8 +812,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field t_params.electron.moments_output_counter[] += 1 write_electron_state(scratch_electron, moments, t_params.electron, io_initial_electron, - t_params.electron.moments_output_counter[], r, z, vperp, - vpa; pdf_electron_converged=true) + t_params.electron.moments_output_counter[], -1.0, 0.0, r, + z, vperp, vpa; pdf_electron_converged=true) finish_electron_io(io_initial_electron) end @@ -817,6 +822,14 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field pdf.electron.pdf_before_ion_timestep[ivpa,ivperp,iz,ir] = pdf.electron.norm[ivpa,ivperp,iz,ir] end + if length(scratch[1].pdf_electron) > 0 + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + for i ∈ 1:length(scratch) + scratch[i].pdf_electron[ivpa,ivperp,iz,ir] = + pdf.electron.norm[ivpa,ivperp,iz,ir] + end + end + end # No need to do electron I/O (apart from possibly debug I/O) any more, so if # adaptive timestep is used, it does not need to adjust to output times. diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 62a63ae1d..84707eb0f 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -79,6 +79,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero maximum_dt::mk_float implicit_braginskii_conduction::Bool implicit_electron_advance::Bool + implicit_electron_time_evolving::Bool implicit_ion_advance::Bool implicit_vpa_advection::Bool implicit_electron_ppar::Bool @@ -130,6 +131,7 @@ struct advance_info continuity::Bool force_balance::Bool energy::Bool + electron_pdf::Bool electron_energy::Bool electron_conduction::Bool neutral_external_source::Bool diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index d5c2daaa5..15de297ab 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3352,7 +3352,7 @@ mix Strings and Tuples in a call). By default load data from moments files, pass `dfns=true` to load from distribution functions files, or `initial_electron=true` and `dfns=true` to load from initial electron -state files. +state files, or `electron_debug=true` and `dfns=true` to load from electron debug files. The `itime_min`, `itime_max` and `itime_skip` options can be used to select only a slice of time points when loading data. In `makie_post_process` these options are read from the @@ -3364,18 +3364,22 @@ values are used as offsets from the final time index of the run. """ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractString,Union{Int,Nothing}}}...; itime_min=1, itime_max=0, itime_skip=1, dfns=false, - initial_electron=false) + initial_electron=false, electron_debug=false) if length(run_dir) == 0 error("No run_dir passed") end if initial_electron && !dfns error("When `initial_electron=true` is passed, `dfns=true` must also be passed") end + if electron_debug && !dfns + error("When `electron_debug=true` is passed, `dfns=true` must also be passed") + end if length(run_dir) > 1 run_info = Tuple(get_run_info_no_setup(r; itime_min=itime_min, itime_max=itime_max, itime_skip=itime_skip, dfns=dfns, - initial_electron=initial_electron) + initial_electron=initial_electron, + electron_debug=electron_debug) for r ∈ run_dir) return run_info end @@ -3396,7 +3400,6 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin * "`(String, Nothing)`. Got $run_dir") end - electron_debug = false if isfile(this_run_dir) # this_run_dir is actually a filename. Assume it is a moment_kinetics output file # and infer the directory and the run_name from the filename. @@ -3412,7 +3415,6 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin run_name = split(filename, ".initial_electron.")[1] elseif occursin(".electron_debug.", filename) run_name = split(filename, ".electron_debug.")[1] - electron_debug = true else error("Cannot recognise '$this_run_dir/$filename' as a moment_kinetics output file") end @@ -3474,11 +3476,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin end if initial_electron - if electron_debug - ext = "electron_debug" - else - ext = "initial_electron" - end + ext = "initial_electron" + elseif electron_debug + ext = "electron_debug" elseif dfns ext = "dfns" else diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index aaf124309..25b4c8e36 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -162,6 +162,7 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI maximum_dt=Inf, implicit_braginskii_conduction=true, implicit_electron_advance=false, + implicit_electron_time_evolving=false, implicit_ion_advance=false, implicit_vpa_advection=false, implicit_electron_ppar=true, diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl index eb2524b63..15a68090d 100644 --- a/moment_kinetics/src/moment_kinetics_structs.jl +++ b/moment_kinetics/src/moment_kinetics_structs.jl @@ -34,6 +34,7 @@ struct scratch_pdf pperp::MPISharedArray{mk_float, ndim_moment} temp_z_s::MPISharedArray{mk_float, ndim_moment} # electrons + pdf_electron::MPISharedArray{mk_float, ndim_pdf_electron} electron_density::MPISharedArray{mk_float, ndim_moment_electron} electron_upar::MPISharedArray{mk_float, ndim_moment_electron} electron_ppar::MPISharedArray{mk_float, ndim_moment_electron} diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index aeeb43436..361547970 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -71,6 +71,7 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype} serial_solve::Bool max_nonlinear_iterations_this_step::Base.RefValue{mk_int} max_linear_iterations_this_step::Base.RefValue{mk_int} + total_its_soft_limit::mk_int preconditioner_type::Tpretype preconditioner_update_interval::mk_int preconditioners::Tprecon @@ -98,6 +99,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa linear_restart=10, linear_max_restarts=0, preconditioner_update_interval=300, + total_its_soft_limit=50, adi_precon_iterations=1, ) @@ -278,7 +280,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa Ref(nl_solver_input.preconditioner_update_interval), Ref(mk_float(0.0)), zeros(mk_int, n_vcut_inds), zeros(mk_int, n_vcut_inds), serial_solve, Ref(0), Ref(0), - preconditioner_type, + nl_solver_input.total_its_soft_limit, preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end @@ -393,17 +395,21 @@ is not necessary to have a very tight `linear_rtol` for the GMRES solve. @timeit global_timer newton_solve!( x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; left_preconditioner=nothing, - right_preconditioner=nothing, coords) = begin + right_preconditioner=nothing, recalculate_preconditioner=nothing, + coords) = begin # This wrapper function constructs the `solver_type` from coords, so that the body of # the inner `newton_solve!()` can be fully type-stable solver_type = Val(Symbol((c for c ∈ keys(coords))...)) return newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params, solver_type; left_preconditioner=left_preconditioner, - right_preconditioner=right_preconditioner, coords=coords) + right_preconditioner=right_preconditioner, + recalculate_preconditioner=recalculate_preconditioner, + coords=coords) end function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params, solver_type::Val; left_preconditioner=nothing, - right_preconditioner=nothing, coords) + right_preconditioner=nothing, recalculate_preconditioner=nothing, + coords) rtol = nl_solver_params.rtol atol = nl_solver_params.atol @@ -472,46 +478,53 @@ old_precon_iterations = nl_solver_params.precon_iterations[] if isnan(residual_norm) error("NaN in Newton iteration at iteration $counter") end - if residual_norm > previous_residual_norm - # Do a line search between x and x+delta_x to try to find an update that does - # decrease residual_norm - s = 0.5 - while s > 1.0e-2 - parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) - residual_func!(residual, x) - residual_norm = distributed_norm(solver_type, residual, norm_params...) - if residual_norm ≤ previous_residual_norm - break - end - s *= 0.5 - end - - #if residual_norm > previous_residual_norm - # # Failed to find a point that decreases the residual, so try a negative - # # step - # s = -1.0e-5 - # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) - # residual_func!(residual, x) - # residual_norm = distributed_norm(solver_type, residual, norm_params...) - # if residual_norm > previous_residual_norm - # # That didn't work either, so just take the full step and hope for - # # convergence later - # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) - # residual_func!(residual, x) - # residual_norm = distributed_norm(solver_type, residual, norm_params...) - # end - #end - if residual_norm > previous_residual_norm - # Line search didn't work, so just take the full step and hope for - # convergence later - parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) - residual_func!(residual, x) - residual_norm = distributed_norm(solver_type, residual, norm_params...) - end - end +# if residual_norm > previous_residual_norm +# # Do a line search between x and x+delta_x to try to find an update that does +# # decrease residual_norm +# s = 0.5 +# while s > 1.0e-2 +# parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) +# residual_func!(residual, x) +# residual_norm = distributed_norm(solver_type, residual, norm_params...) +# if residual_norm ≤ previous_residual_norm +# break +# end +# s *= 0.5 +# end +# println("line search s ", s) +# +# #if residual_norm > previous_residual_norm +# # # Failed to find a point that decreases the residual, so try a negative +# # # step +# # s = -1.0e-5 +# # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) +# # residual_func!(residual, x) +# # residual_norm = distributed_norm(solver_type, residual, norm_params...) +# # if residual_norm > previous_residual_norm +# # # That didn't work either, so just take the full step and hope for +# # # convergence later +# # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) +# # residual_func!(residual, x) +# # residual_norm = distributed_norm(solver_type, residual, norm_params...) +# # end +# #end +# if residual_norm > previous_residual_norm +# # Line search didn't work, so just take the full step and hope for +# # convergence later +# parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) +# residual_func!(residual, x) +# residual_norm = distributed_norm(solver_type, residual, norm_params...) +# end +# end parallel_map(solver_type, (w) -> w, x, w) previous_residual_norm = residual_norm + if recalculate_preconditioner !== nothing && counter % nl_solver_params.preconditioner_update_interval == 0 + # Have taken a large number of Newton iterations already - convergence must be + # slow, so try updating the preconditioner. + recalculate_preconditioner() + end + #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol") if residual_norm < 0.1/rtol && close_counter < 0 && close_linear_counter < 0 diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 772cda8ba..481e3f725 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -1058,15 +1058,19 @@ end """ adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, error_norm_method, success, - nl_max_its_fraction, composition; - electron=false, local_max_dt::mk_float=Inf) + nl_max_its_fraction, nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt, + composition; electron=false, + local_max_dt::mk_float=Inf) Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, error_norm_method, success, - nl_max_its_fraction, composition; - electron=false, local_max_dt::mk_float=Inf) + nl_max_its_fraction, nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt, + composition; electron=false, + local_max_dt::mk_float=Inf) # Get global minimum of CFL limits CFL_limit = Ref(0.0) this_limit_caused_by = nothing @@ -1325,7 +1329,19 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, end end - if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 + if nl_total_its_soft_limit_reduce_dt && t_params.previous_dt[] > 0.0 + # The last step took very many nonlinear iterations, so reduce the + # timestep slightly. + # If t_params.previous_dt[]==0.0, then the previous step failed so + # timestep will not be increasing, so do not need this check. + iteration_limited_dt = t_params.previous_dt[] / t_params.max_increase_factor + if t_params.dt[] > iteration_limited_dt + t_params.dt[] = iteration_limited_dt + @serial_region begin + this_limit_caused_by = 5 + end + end + elseif (nl_max_its_fraction > 0.5 || nl_total_its_soft_limit) && t_params.previous_dt[] > 0.0 # The last step took many nonlinear iterations, so do not allow the # timestep to increase. # If t_params.previous_dt[]==0.0, then the previous step failed so diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d8515d55f..1941d212f 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -46,7 +46,10 @@ using ..charge_exchange: ion_charge_exchange_collisions_1V!, neutral_charge_exchange_collisions_1V!, ion_charge_exchange_collisions_3V!, neutral_charge_exchange_collisions_3V! -using ..electron_kinetic_equation: update_electron_pdf!, implicit_electron_advance! +using ..electron_kinetic_equation: update_electron_pdf!, implicit_electron_advance!, + electron_backward_euler!, + electron_kinetic_equation_euler_update!, + apply_electron_bc_and_constraints_no_r! using ..ionization: ion_ionization_collisions_1V!, neutral_ionization_collisions_1V!, ion_ionization_collisions_3V!, neutral_ionization_collisions_3V! using ..krook_collisions: krook_collisions! @@ -384,6 +387,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, # Not an IMEX scheme, so cannot have any implicit terms t_input["implicit_braginskii_conduction"] = false t_input["implicit_electron_advance"] = false + t_input["implicit_electron_time_evolving"] = false t_input["implicit_ion_advance"] = false t_input["implicit_vpa_advection"] = false t_input["implicit_electron_ppar"] = false @@ -394,6 +398,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, if composition.electron_physics ∉ (kinetic_electrons, kinetic_electrons_with_temperature_equation) t_input["implicit_electron_advance"] = false + t_input["implicit_electron_time_evolving"] = false t_input["implicit_electron_ppar"] = false end end @@ -425,6 +430,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end implicit_electron_ppar = false + implicit_electron_advance = false + implicit_electron_time_evolving = false electron_preconditioner_type = nothing decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] @@ -436,6 +443,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, elseif electron === false debug_io = nothing implicit_electron_ppar = false + implicit_electron_advance = false + implicit_electron_time_evolving = false electron_preconditioner_type = nothing decrease_dt_iteration_threshold = -1 increase_dt_iteration_threshold = typemax(mk_int) @@ -448,6 +457,9 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, debug_io = nothing implicit_electron_ppar = (t_input["implicit_electron_ppar"] !== false) + implicit_electron_advance = (t_input["implicit_electron_advance"] !== false) + implicit_electron_time_evolving = (t_input["implicit_electron_time_evolving"] !== false) + electron_precon_types = Dict("lu" => :electron_lu, "adi" => :electron_adi) if implicit_electron_ppar if t_input["implicit_electron_ppar"] === true if block_size[] == 1 @@ -458,7 +470,6 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron_preconditioner_type = Val(:electron_adi) end else - electron_precon_types = Dict("lu" => :electron_lu, "adi" => :electron_adi) if t_input["implicit_electron_ppar"] ∈ keys(electron_precon_types) electron_preconditioner_type = Val(electron_precon_types[t_input["implicit_electron_ppar"]]) else @@ -469,6 +480,32 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, * "preconditioner to use - one of $precon_keys.") end end + elseif implicit_electron_advance + if t_input["implicit_electron_advance"] !== true + error("No options other than `true` supported yet for " + * "implicit_electron_advance.") + end + electron_preconditioner_type = Val(:electron_lu) + elseif implicit_electron_time_evolving + if t_input["implicit_electron_time_evolving"] === true + if block_size[] == 1 + # No need to parallelise, so un-split LU solver should be most efficient. + electron_preconditioner_type = Val(:electron_lu) + else + # Want to parallelise preconditioner, so use ADI method. + electron_preconditioner_type = Val(:electron_adi) + end + else + if t_input["implicit_electron_time_evolving"] ∈ keys(electron_precon_types) + electron_preconditioner_type = Val(electron_precon_types[t_input["implicit_electron_time_evolving"]]) + else + precon_keys = collect(keys(electron_precon_types)) + error("Unrecognised option implicit_electron_time_evolving=" + * "\"$(t_input["implicit_electron_time_evolving"])\" which " + * "should be either false/true or a string giving the type of " + * "preconditioner to use - one of $precon_keys.") + end + end else electron_preconditioner_type = Val(:none) end @@ -480,6 +517,15 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, max_pseudotime = Inf include_wall_bc_in_preconditioner = false electron_t_params = electron + + # Check maximum dt for electrons + if rk_coefs_implicit !== nothing + electron.dt[] = min(electron.dt[], electron.maximum_dt, + electron.cap_factor_ion_dt * dt[] * rk_coefs_implicit[1,1]) + else + electron.dt[] = min(electron.dt[], electron.maximum_dt) + end + electron.previous_dt[] = electron.dt[] end return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, dt_before_output, dt_before_last_fail, mk_float(CFL_prefactor), @@ -498,11 +544,10 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, mk_float(t_input["last_fail_proximity_factor"]), mk_float(t_input["minimum_dt"]), mk_float(t_input["maximum_dt"]), electron !== nothing && t_input["implicit_braginskii_conduction"], - electron !== nothing && t_input["implicit_electron_advance"], + implicit_electron_advance, implicit_electron_time_evolving, electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], - electron !== nothing && implicit_electron_ppar, - electron_preconditioner_type, + implicit_electron_ppar, electron_preconditioner_type, mk_float(t_input["constraint_forcing_rate"]), decrease_dt_iteration_threshold, increase_dt_iteration_threshold, mk_float(cap_factor_ion_dt), mk_int(max_pseudotimesteps), @@ -713,7 +758,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0) nl_solver_electron_advance_params = - setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation), + setup_nonlinear_solve(t_params.implicit_electron_advance || t_params.implicit_electron_time_evolving || t_params.implicit_electron_ppar, input_dict, (z=z, vperp=vperp, vpa=vpa), (r,); @@ -742,9 +787,10 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " * "time") end - if t_params.implicit_electron_advance && t_params.implicit_electron_ppar - error("Cannot use implicit_electron_advance and implicit_electron_ppar at the " - * "same time.") + if (t_params.implicit_electron_advance + t_params.implicit_electron_time_evolving + + t_params.implicit_electron_ppar) > 1 + error("Can only use one of implicit_electron_advance, " + * "implicit_electron_time_evolving, and implicit_electron_ppar.") end nl_solver_params = (electron_conduction=electron_conduction_nl_solve_parameters, electron_advance=nl_solver_electron_advance_params, @@ -763,9 +809,11 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # create an array of structs containing scratch arrays for the pdf and low-order moments # that may be evolved separately via fluid equations - scratch = setup_scratch_arrays(moments, pdf, t_params.n_rk_stages + 1) + scratch = setup_scratch_arrays(moments, pdf, t_params.n_rk_stages + 1, + t_params.implicit_electron_time_evolving) if t_params.rk_coefs_implicit !== nothing - scratch_implicit = setup_scratch_arrays(moments, pdf, t_params.n_rk_stages) + scratch_implicit = setup_scratch_arrays(moments, pdf, t_params.n_rk_stages, + t_params.implicit_electron_time_evolving) else scratch_implicit = nothing end @@ -1168,6 +1216,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_continuity = false advance_force_balance = false advance_energy = false + advance_electron_pdf = false advance_electron_energy = false advance_electron_conduction = false advance_neutral_z_advection = false @@ -1291,7 +1340,9 @@ function setup_advance_flags(moments, composition, t_params, collisions, # moment-kinetically then advance the electron energy equation if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) - if !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) + if !(t_params.implicit_electron_advance || + t_params.implicit_electron_time_evolving || + t_params.implicit_electron_ppar) advance_electron_energy = true advance_electron_conduction = true end @@ -1336,8 +1387,8 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_external_source, advance_ion_numerical_dissipation, advance_neutral_numerical_dissipation, advance_sources, advance_continuity, advance_force_balance, advance_energy, - advance_electron_energy, advance_electron_conduction, - advance_neutral_external_source, + advance_electron_pdf, advance_electron_energy, + advance_electron_conduction, advance_neutral_external_source, advance_neutral_sources, advance_neutral_continuity, advance_neutral_force_balance, advance_neutral_energy, manufactured_solns_test, r_diffusion, vpa_diffusion, @@ -1376,6 +1427,7 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_continuity = false advance_force_balance = false advance_energy = false + advance_electron_pdf = false advance_electron_energy = false advance_electron_conduction = false advance_neutral_z_advection = false @@ -1450,10 +1502,14 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_electron_conduction = true end - if (t_params.implicit_electron_advance || t_params.implicit_electron_ppar) + if (t_params.implicit_electron_advance || t_params.implicit_electron_time_evolving || + t_params.implicit_electron_ppar) advance_electron_energy = true advance_electron_conduction = true end + if t_params.implicit_electron_time_evolving + advance_electron_pdf = true + end manufactured_solns_test = manufactured_solns_input.use_for_advance @@ -1468,11 +1524,12 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_external_source, advance_ion_numerical_dissipation, advance_neutral_numerical_dissipation, advance_sources, advance_continuity, advance_force_balance, advance_energy, - advance_electron_energy, advance_electron_conduction, - advance_neutral_external_source, advance_neutral_sources, - advance_neutral_continuity, advance_neutral_force_balance, - advance_neutral_energy, manufactured_solns_test, r_diffusion, - vpa_diffusion, vperp_diffusion, vz_diffusion) + advance_electron_pdf, advance_electron_energy, + advance_electron_conduction, advance_neutral_external_source, + advance_neutral_sources, advance_neutral_continuity, + advance_neutral_force_balance, advance_neutral_energy, + manufactured_solns_test, r_diffusion, vpa_diffusion, + vperp_diffusion, vz_diffusion) end function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, @@ -1673,7 +1730,7 @@ end create an array of structs containing scratch arrays for the normalised pdf and low-order moments that may be evolved separately via fluid equations """ -function setup_scratch_arrays(moments, pdf, n) +function setup_scratch_arrays(moments, pdf, n, time_evolve_electrons) # will create n structs, each of which will contain one pdf, # density, parallel flow, parallel pressure, and perpendicular pressure array for ions # (possibly) the same for electrons, and the same for neutrals. The actual array will @@ -1682,6 +1739,12 @@ function setup_scratch_arrays(moments, pdf, n) scratch = Vector{scratch_pdf}(undef, n) pdf_dims = size(pdf.ion.norm) moment_dims = size(moments.ion.dens) + + if time_evolve_electrons + pdf_electron_dims = size(pdf.electron.norm) + else + pdf_electron_dims = (0,0,0,0) + end moment_electron_dims = size(moments.electron.dens) pdf_neutral_dims = size(pdf.neutral.norm) @@ -1697,6 +1760,7 @@ function setup_scratch_arrays(moments, pdf, n) pperp_array = allocate_shared_float(moment_dims...) temp_array = allocate_shared_float(moment_dims...) + pdf_electron_array = allocate_shared_float(pdf_electron_dims...) density_electron_array = allocate_shared_float(moment_electron_dims...) upar_electron_array = allocate_shared_float(moment_electron_dims...) ppar_electron_array = allocate_shared_float(moment_electron_dims...) @@ -1711,11 +1775,11 @@ function setup_scratch_arrays(moments, pdf, n) scratch[istage] = scratch_pdf(pdf_array, density_array, upar_array, ppar_array, pperp_array, temp_array, - density_electron_array, upar_electron_array, - ppar_electron_array, pperp_electron_array, - temp_electron_array, pdf_neutral_array, - density_neutral_array, uz_neutral_array, - pz_neutral_array) + pdf_electron_array, density_electron_array, + upar_electron_array, ppar_electron_array, + pperp_electron_array, temp_electron_array, + pdf_neutral_array, density_neutral_array, + uz_neutral_array, pz_neutral_array) @serial_region begin scratch[istage].pdf .= pdf.ion.norm scratch[istage].density .= moments.ion.dens @@ -1723,6 +1787,9 @@ function setup_scratch_arrays(moments, pdf, n) scratch[istage].ppar .= moments.ion.ppar scratch[istage].pperp .= moments.ion.pperp + if time_evolve_electrons + scratch[istage].pdf_electron .= pdf.electron.norm + end scratch[istage].electron_density .= moments.electron.dens scratch[istage].electron_upar .= moments.electron.upar scratch[istage].electron_ppar .= moments.electron.ppar @@ -1870,6 +1937,7 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_para # Stop cleanly if a file called 'stop' was created println("Found 'stopnow' file $(t_params.stopfile * "now"), aborting run") finish_now = true + t_params.dt_before_output[] = t_params.dt[] end if t_params.adaptive && !t_params.write_after_fixed_step_count @@ -2415,6 +2483,19 @@ moments and moment derivatives moments, vpa) end end + + if (composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + && length(this_scratch.pdf_electron) > 0) + + for ir ∈ 1:r.n + @views apply_electron_bc_and_constraints_no_r!( + this_scratch.pdf_electron[:,:,:,ir], fields.phi[:,ir], moments, + r, z, vperp, vpa, vperp_spectral, vpa_spectral, + electron_vpa_advect, num_diss_params, composition, ir, + nl_solver_params.electron_advance) + end + end end # update remaining velocity moments that are calculable from the evolved pdf @@ -2475,7 +2556,7 @@ moments and moment derivatives electron_vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, nl_solver_params.electron_advance, max_electron_pdf_iterations, - max_electron_sim_time) + max_electron_sim_time, solution_method="artificial_time_derivative") success = kinetic_electron_success end end @@ -2558,7 +2639,9 @@ end external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, nl_solver_params, advance, scratch_dummy, r, z, vperp, - vpa, vzeta, vr, vz, success, nl_max_its_fraction) + vpa, vzeta, vr, vz, success, nl_max_its_fraction, + nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. @@ -2569,7 +2652,8 @@ appropriate. geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, nl_solver_params, advance, scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success, - nl_max_its_fraction) = begin + nl_max_its_fraction, nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt) = begin #error_norm_method = "Linf" error_norm_method = "L2" @@ -2651,6 +2735,10 @@ appropriate. begin_s_r_z_region() rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params) end + if t_params.implicit_electron_time_evolving + begin_r_z_vperp_vpa_region() + rk_loworder_solution!(scratch, scratch_implicit, :pdf_electron, t_params) + end if composition.electron_physics ∈ (braginskii_fluid, kinetic_electrons, kinetic_electrons_with_temperature_equation) begin_r_z_region() @@ -2682,6 +2770,7 @@ appropriate. scratch[t_params.n_rk_stages+1].ppar, scratch[t_params.n_rk_stages+1].pperp, scratch[t_params.n_rk_stages+1].temp_z_s, + scratch[2].pdf_electron, scratch[t_params.n_rk_stages+1].electron_density, scratch[t_params.n_rk_stages+1].electron_upar, scratch[t_params.n_rk_stages+1].electron_ppar, @@ -2878,7 +2967,8 @@ appropriate. adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, error_norm_method, success, nl_max_its_fraction, - composition) + nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt, composition) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) @@ -3015,6 +3105,12 @@ end first_scratch.pperp[iz,ir,is] = moments.ion.pperp[iz,ir,is] end + if length(first_scratch.pdf_electron) > 0 + begin_r_z_vperp_vpa_region() + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + first_scratch.pdf_electron[ivpa,ivperp,iz,ir] = pdf.electron.norm[ivpa,ivperp,iz,ir] + end + end begin_r_z_region() @loop_r_z ir iz begin first_scratch.electron_density[iz,ir] = moments.electron.dens[iz,ir] @@ -3097,8 +3193,10 @@ end # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. old_scratch = scratch_implicit[istage] - update_electrons = !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) - success = apply_all_bcs_constraints_update_moments!( + update_electrons = !(t_params.implicit_electron_advance || + t_params.implicit_electron_time_evolving || + t_params.implicit_electron_ppar) + bcs_constraints_success = apply_all_bcs_constraints_update_moments!( scratch_implicit[istage], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, @@ -3106,6 +3204,9 @@ end t_params, nl_solver_params, advance, scratch_dummy, false, max_electron_pdf_iterations, max_electron_sim_time; update_electrons=update_electrons) + if bcs_constraints_success != "" + success = bcs_constrains_success + end if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3143,9 +3244,11 @@ end || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1]) || t_params.implicit_coefficient_is_zero[istage+1]) update_electrons = (t_params.rk_coefs_implicit === nothing - || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar)) + || !(t_params.implicit_electron_advance || + t_params.implicit_electron_time_evolving || + t_params.implicit_electron_ppar)) diagnostic_moments = diagnostic_checks && istage == n_rk_stages - success = apply_all_bcs_constraints_update_moments!( + bcs_constraints_success = apply_all_bcs_constraints_update_moments!( scratch[istage+1], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, @@ -3153,6 +3256,9 @@ end advance, scratch_dummy, diagnostic_moments, max_electron_pdf_iterations, max_electron_sim_time; pdf_bc_constraints=apply_bc_constraints, update_electrons=update_electrons) + if bcs_constraints_success != "" + success = bcs_constrains_success + end if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3163,7 +3269,9 @@ end if t_params.adaptive nl_max_its_fraction = 0.0 - if t_params.implicit_electron_advance + nl_total_its_soft_limit = false + nl_total_its_soft_limit_reduce_dt = false + if t_params.implicit_electron_advance || t_params.implicit_electron_time_evolving params_to_check = (nl_solver_params.ion_advance, nl_solver_params.vpa_advection, nl_solver_params.electron_conduction, @@ -3188,6 +3296,8 @@ end nl_max_its_fraction = max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations, nl_max_its_fraction) + nl_total_its_soft_limit = p.max_linear_iterations_this_step[] > p.total_its_soft_limit || nl_total_its_soft_limit + nl_total_its_soft_limit_reduce_dt = p.max_linear_iterations_this_step[] > 1.5 * p.total_its_soft_limit || nl_total_its_soft_limit_reduce_dt end end adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, @@ -3196,7 +3306,9 @@ end geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, nl_solver_params, advance, scratch_dummy, r, z, vperp, - vpa, vzeta, vr, vz, success, nl_max_its_fraction) + vpa, vzeta, vr, vz, success, nl_max_its_fraction, + nl_total_its_soft_limit, + nl_total_its_soft_limit_reduce_dt) elseif success != "" error("Implicit part of timestep failed") end @@ -3212,6 +3324,9 @@ end t_params.electron.max_step_count_this_ion_step[] = 0 t_params.electron.max_t_increment_this_ion_step[] = 0.0 end + if t_params.implicit_electron_time_evolving + reset_nonlinear_per_stage_counters!(nl_solver_params.electron_advance) + end if t_params.previous_dt[] > 0.0 @@ -3231,6 +3346,12 @@ end end # No need to synchronize here as we only change electron quantities and previous # region only changed ion quantities. + if length(final_scratch.pdf_electron) > 0 + begin_r_z_vperp_vpa_region() + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + pdf.electron.norm[ivpa,ivperp,iz,ir] = final_scratch.pdf_electron[ivpa,ivperp,iz,ir] + end + end begin_r_z_region(no_synchronize=true) @loop_r_z ir iz begin moments.electron.dens[iz,ir] = final_scratch.electron_density[iz,ir] @@ -3545,6 +3666,18 @@ with fvec_in an input and fvec_out the output z_spectral, composition, external_source_settings.neutral, num_diss_params) end + + if advance.electron_pdf + electron_t_params = (dt=Ref(dt),) + for ir ∈ 1:r.n + @views electron_kinetic_equation_euler_update!( + fvec_out.pdf_electron[:,:,:,ir], fvec_out.electron_ppar[:,ir], + fvec_in.pdf_electron[:,:,:,ir], fvec_in.electron_ppar[:,ir], + moments, z, vperp, vpa, z_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, collisions, composition, + external_source_settings, num_diss_params, electron_t_params, ir) + end + end if advance.electron_energy electron_energy_equation!(fvec_out.electron_ppar, fvec_in.electron_ppar, fvec_in.density, fvec_in.electron_upar, fvec_in.density, @@ -3584,16 +3717,33 @@ end electron_z_advect, electron_vpa_advect = advect_objects.electron_z_advect, advect_objects.electron_vpa_advect neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect + success = true if t_params.implicit_electron_advance - success = implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, - moments, fields, collisions, composition, - geometry, external_source_settings, - num_diss_params, r, z, vperp, vpa, - r_spectral, z_spectral, vperp_spectral, - vpa_spectral, electron_z_advect, - electron_vpa_advect, gyroavs, scratch_dummy, - t_params.electron, t_params.dt[], - nl_solver_params.electron_advance) + electron_success = implicit_electron_advance!(fvec_out, fvec_in, pdf, + scratch_electron, moments, fields, + collisions, composition, geometry, + external_source_settings, + num_diss_params, r, z, vperp, vpa, + r_spectral, z_spectral, + vperp_spectral, vpa_spectral, + electron_z_advect, + electron_vpa_advect, gyroavs, + scratch_dummy, t_params.electron, + dt, + nl_solver_params.electron_advance) + + success = success && (electron_success == "") + elseif t_params.implicit_electron_time_evolving + t_params.electron.dt[] = dt + for ir ∈ 1:r.n + electron_success = electron_backward_euler!( + fvec_in, fvec_out, moments, fields.phi, collisions, composition, r, + z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, t_params.electron, + external_source_settings, num_diss_params, + nl_solver_params.electron_advance, ir; evolve_ppar=true) + success = success && electron_success + end elseif t_params.implicit_electron_ppar max_electron_pdf_iterations = t_params.electron.max_pseudotimesteps max_electron_sim_time = t_params.electron.max_pseudotime @@ -3617,7 +3767,7 @@ end fvec_out_electron_ppar[iz,ir] = moments_electron_ppar[iz,ir] end - success = (electron_success == "") + success = success && (electron_success == "") elseif advance.electron_conduction success = implicit_braginskii_conduction!(fvec_out, fvec_in, moments, z, r, dt, @@ -3627,23 +3777,26 @@ end end if nl_solver_params.ion_advance !== nothing - success = implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, - advect_objects, vz, vr, vzeta, vpa, vperp, - gyrophase, z, r, t_params.t[], dt, - spectral_objects, composition, collisions, - geometry, scratch_dummy, manufactured_source_list, - external_source_settings, num_diss_params, - gyroavs, nl_solver_params.ion_advance, advance, - fp_arrays, istage) + ion_success = implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, t_params.t[], dt, + spectral_objects, composition, collisions, + geometry, scratch_dummy, + manufactured_source_list, + external_source_settings, num_diss_params, + gyroavs, nl_solver_params.ion_advance, + advance, fp_arrays, istage) + success = success && ion_success elseif advance.vpa_advection - success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, - z_advect, vpa_advect, vpa, vperp, z, r, dt, - t_params.t[], r_spectral, z_spectral, - vpa_spectral, composition, collisions, - external_source_settings.ion, geometry, - nl_solver_params.vpa_advection, - advance.vpa_diffusion, num_diss_params, gyroavs, - scratch_dummy) + ion_success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, + z_advect, vpa_advect, vpa, vperp, z, r, dt, + t_params.t[], r_spectral, z_spectral, + vpa_spectral, composition, collisions, + external_source_settings.ion, geometry, + nl_solver_params.vpa_advection, + advance.vpa_diffusion, num_diss_params, + gyroavs, scratch_dummy) + success = success && ion_success end return success @@ -3922,6 +4075,12 @@ function update_solution_vector!(new_evolved, old_evolved, moments, composition, new_evolved.upar[iz,ir,is] = old_evolved.upar[iz,ir,is] new_evolved.ppar[iz,ir,is] = old_evolved.ppar[iz,ir,is] end + if length(new_evolved.pdf_electron) > 0 + begin_r_z_vperp_vpa_region() + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + new_evolved.pdf_electron[ivpa,ivperp,iz,ir] = old_evolved.pdf_electron[ivpa,ivperp,iz,ir] + end + end begin_r_z_region() @loop_r_z ir iz begin new_evolved.electron_density[iz,ir] = old_evolved.electron_density[iz,ir] diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 60ef83ae6..9db8db41d 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -117,6 +117,7 @@ kinetic_input["timestepping"] = OptionsDict("type" => "PareschiRusso2(2,2,2)", kinetic_input["electron_timestepping"] = OptionsDict("nstep" => 5000000, "dt" => 2.0e-5, + "maximum_dt" => Inf, "nwrite" => 10000, "nwrite_dfns" => 100000, "decrease_dt_iteration_threshold" => 5000, diff --git a/util/precompile_run.jl b/util/precompile_run.jl index a1e3e58ad..d66c9e8e4 100644 --- a/util/precompile_run.jl +++ b/util/precompile_run.jl @@ -111,6 +111,7 @@ kinetic_electron_input = recursive_merge(cheb_input, OptionsDict("evolve_moments "vr" => OptionsDict("ngrid" => 1, "nelement" => 1), "composition" => OptionsDict("electron_physics" => "kinetic_electrons"), + "timestepping" => OptionsDict("type" => "KennedyCarpenterARK324"), "electron_timestepping" => OptionsDict("nstep" => 1, "dt" => 2.0e-11, "initialization_residual_value" => 1.0e10, diff --git a/util/precompile_run_kinetic-electrons.jl b/util/precompile_run_kinetic-electrons.jl index fd3e54ca7..d1b67459e 100644 --- a/util/precompile_run_kinetic-electrons.jl +++ b/util/precompile_run_kinetic-electrons.jl @@ -48,7 +48,8 @@ input = OptionsDict("output" => OptionsDict("run_name" => "precompilation", "bc" => "zero", "L" => 8.0, "discretization" => "gausslegendre_pseudospectral"), - "timestepping" => OptionsDict("nstep" => 1, + "timestepping" => OptionsDict("type" => "KennedyCarpenterARK324", + "nstep" => 1, "dt" => 2.0e-11), "electron_timestepping" => OptionsDict("nstep" => 1, "dt" => 2.0e-11,