From a6bdda9a98750e72d89f5be35ee6150ed1529769 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 17 Oct 2024 22:52:47 +0100 Subject: [PATCH 01/15] Copy updated moments.electron.ppar into fvec_out - electron ppar evolves Previously the `electron_ppar` in the ion/neutral `scratch_pdf` struct was never updated when using the implicit electron solve (`implicit_electron_ppar = true`). --- moment_kinetics/src/time_advance.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index b9d85420d..57ac3167e 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3530,7 +3530,17 @@ end nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; ion_dt=dt) + + # Update `fvec_out.electron_ppar` with the new electron pressure + begin_r_z_region() + fvec_out_electron_ppar = fvec_out.electron_ppar + moments_electron_ppar = moments.electron.ppar + @loop_r_z ir iz begin + fvec_out_electron_ppar[iz,ir] = moments_electron_ppar[iz,ir] + end + success = (electron_success == "") + elseif advance.electron_conduction success = implicit_braginskii_conduction!(fvec_out, fvec_in, moments, z, r, dt, z_spectral, composition, collisions, From 692244f1ebc555fab00ff7c2c0ab0048b1dea0af Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 18 Oct 2024 09:18:08 +0100 Subject: [PATCH 02/15] Kinetic electron example in periodic z-domain Maybe useful for testing. --- ...tails-compressed-z4-PareschiRusso2222.toml | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 examples/kinetic-electrons/nonuniform_periodic_split3_kinetic-coarse-tails-compressed-z4-PareschiRusso2222.toml diff --git a/examples/kinetic-electrons/nonuniform_periodic_split3_kinetic-coarse-tails-compressed-z4-PareschiRusso2222.toml b/examples/kinetic-electrons/nonuniform_periodic_split3_kinetic-coarse-tails-compressed-z4-PareschiRusso2222.toml new file mode 100644 index 000000000..0a18a3aba --- /dev/null +++ b/examples/kinetic-electrons/nonuniform_periodic_split3_kinetic-coarse-tails-compressed-z4-PareschiRusso2222.toml @@ -0,0 +1,152 @@ +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[r] +ngrid = 1 +nelement = 1 + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +#nelement_local = 4 +bc = "periodic" +element_spacing_option = "compressed_4" + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[composition] +T_e = 0.2 +electron_physics = "kinetic_electrons" +n_ion_species = 1 +n_neutral_species = 1 + +[ion_species_1] +initial_temperature = 0.2 +initial_density = 1.0 + +[z_IC_ion_species_1] +initialization_option = "sinusoid" +density_amplitude = 0.0 #0.2 +temperature_amplitude = 0.3 +density_phase = 0.0 +upar_amplitude = 0.0 #0.1 +temperature_phase = 1.0 +upar_phase = 2.0 + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 0.2 +initial_density = 1.0 + +[z_IC_neutral_species_1] +initialization_option = "sinusoid" +temperature_amplitude = 0.0 +density_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[krook_collisions] +use_krook = true + +[timestepping] +type = "PareschiRusso2(2,2,2)" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 100000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 2.0e-8 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 5.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 From 8f3c67510f33da6fbc14a80d0c62e83eb99ea5d0 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 19 Oct 2024 10:59:27 +0100 Subject: [PATCH 03/15] Restrict some status printouts to rank-0 --- moment_kinetics/src/electron_kinetic_equation.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 61380097b..7359576e2 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1029,7 +1029,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos end if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval -println("recalculating precon") +global_rank[] == 0 && println("recalculating precon") nl_solver_params.solves_since_precon_update[] = 0 nl_solver_params.precon_dt[] = t_params.dt[] @@ -1335,26 +1335,26 @@ println("recalculating precon") if t_params.dt[] < t_params.previous_dt[] # Had to decrease timestep on the first step to get convergence, # so start next ion timestep with the decreased value. - print("decreasing previous_dt due to failures ", t_params.previous_dt[]) + global_rank[] == 0 && print("decreasing previous_dt due to failures ", t_params.previous_dt[]) t_params.previous_dt[] = t_params.dt[] - println(" -> ", t_params.previous_dt[]) + global_rank[] == 0 && println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) elseif nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold # Step succeeded, but took a lot of iterations so decrease initial # step size. - print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) + global_rank[] == 0 && print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) t_params.previous_dt[] /= t_params.max_increase_factor - println(" -> ", t_params.previous_dt[]) + global_rank[] == 0 && println(" -> ", t_params.previous_dt[]) #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.previous_dt[] < t_params.cap_factor_ion_dt * ion_dt) # Only took a few iterations, so increase initial step size. - print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) + global_rank[] == 0 && print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) if ion_dt === nothing t_params.previous_dt[] *= t_params.max_increase_factor else t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) end - println(" -> ", t_params.previous_dt[]) + global_rank[] == 0 && println(" -> ", t_params.previous_dt[]) end end From 6254017c9b039d37b43b94982e5574ac474aa606 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 3 Nov 2024 14:09:58 +0000 Subject: [PATCH 04/15] Use OrderedDict or SortedDict everywhere It is important to have AbstractDicts with a deterministic order of entries for options, etc., so that when writing them to the output files with parallel I/O, the writes all happen in exactly the same order, which is necessary for consistency. Failing to do this can cause hangs when writing output because the HDF5 cache(s) are inconsistent on different ranks, which can lead to some ranks calling an MPI function (e.g. MPI.Bcast!()) when others do not. --- .github/workflows/examples.yml | 2 +- docs/src/developing.md | 54 +++++++++ moment_kinetics/src/coordinates.jl | 3 +- moment_kinetics/src/external_sources.jl | 34 ++++-- moment_kinetics/src/file_io.jl | 12 +- moment_kinetics/src/fokker_planck.jl | 5 +- moment_kinetics/src/geo.jl | 6 +- moment_kinetics/src/input_structs.jl | 69 +++++++---- moment_kinetics/src/krook_collisions.jl | 6 +- moment_kinetics/src/load_data.jl | 7 +- moment_kinetics/src/maxwell_diffusion.jl | 6 +- moment_kinetics/src/moment_kinetics.jl | 2 +- moment_kinetics/src/moment_kinetics_input.jl | 2 +- moment_kinetics/src/species_input.jl | 8 +- moment_kinetics/src/timer_utils.jl | 8 +- moment_kinetics/src/type_definitions.jl | 4 +- moment_kinetics/src/utils.jl | 15 ++- moment_kinetics/test/harrisonthompson.jl | 114 +++++++++--------- moment_kinetics/test/jacobian_matrix_tests.jl | 96 +++++++-------- 19 files changed, 277 insertions(+), 176 deletions(-) diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index e38a824e9..0eb0ffb71 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -23,4 +23,4 @@ jobs: touch Project.toml julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()' # Reduce nstep for each example to 10 to avoid the CI job taking too long - julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' + julia -O3 --project -e 'using moment_kinetics; using moment_kinetics.type_definitions: OptionsDict; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", OptionsDict()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", OptionsDict()), "nelement_local", ""); pop!(get(input, "r", OptionsDict()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", OptionsDict()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", OptionsDict()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' diff --git a/docs/src/developing.md b/docs/src/developing.md index 4b57f1e15..241f4aceb 100644 --- a/docs/src/developing.md +++ b/docs/src/developing.md @@ -41,6 +41,18 @@ String and as a tree of HDF5 variables. use `nothing` as a default for some setting, that is fine, but must be done after the input is read, and not stored in the `input_dict`. +!!! warning "Parallel I/O consistency" + To ensure consistency between all MPI ranks in the order of reads and/or + writes when using Parallel I/O, all dictionary types used to store options + must be either `OrderedDict` or `SortedDict`, so that their order of + entries is deterministic (which is not the case for `Dict`, which instead + optimises for look-up speed). This should mostly be taken care of by using + `moment_kinetics`'s `OptionsDict` type (which is an alias for + `OrderedDict{String,Any}`). We also need to sort the input after it is read + by `TOML`, which is taken care of by + [`moment_kinetics.input_structs.convert_to_sorted_nested_OptionsDict`](@ref). + See also [Parallel I/O](@ref parallel_io_section). + ## Array types @@ -219,6 +231,48 @@ communicator is `comm_block[]`). See also notes on debugging the 'anyv' parallelisation: [Collision operator and 'anyv' region](@ref). +## [Parallel I/O](@id parallel_io_section) + +The code provides an option to use parallel I/O, which allows all output to be +written to a single output file even when using distributed-MPI parallelism - +this is the default option when the linked HDF5 library is compiled with +parallel-I/O support. + +There are a few things to be aware of to ensure parallel I/O works correctly: +* Some operations have to be called simultaneously on all the MPI ranks that + have the output file open. Roughly, these are any operations that change the + 'metadata' of the file, for example opening/closing files, creating + variables, extending dimensions of variables, changing attributes of + variables. Reading or writing the data from a variable does not have to be + done collectively - actually when we write data we ensure that every rank + that is writing writes a non-overlapping slice of the array to avoid + contention that could slow down the I/O (because one rank has to wait for + another) and to avoid slight inconsistencies because it is uncertain which + rank writes the data last. For more details see the [HDF5.jl + documentation](https://juliaio.github.io/HDF5.jl/stable/mpi/#Reading-and-writing-data-in-parallel) + and the [HDF5 + documentation](https://support.hdfgroup.org/archive/support/HDF5/doc/RM/CollectiveCalls.html). +* One important subtlety is that the `Dict` type does not guarantee a + deterministic order of entries. When you iterate over a `Dict`, you can get + the results in a different order at different times or on different MPI + ranks. If we iterated over a `Dict` to create variables to write to an output + file, or to read from a file, then different MPI ranks might (sometimes) get + the variables in a different order, causing errors. We therefore use either + `OrderedDict` or `SortedDict` types for anything that might be written to or + read from an HDF5 file. + +If the collective operations are not done perfectly consistently, the errors +can be extremely non-obvious. The inconsistent operations may appear to execute +correctly, for example because the same number of variables are created, and +the metadata may only actually be written from the rank-0 process, but the +inconsistency may cause errors later. [JTO, 3/11/2024: my best guess as to the +reason for this is that it puts HDF5's 'metadata cache' in inconsistent states +on different ranks, and this means that at some later time the ranks will cycle +some metadata out of the cache in different orders, and then some ranks will be +able to get the metadata from the cache, while others have to read it from the +file. The reading from the file requires some collective MPI call, which is +only called from some ranks and not others, causing the code to hang.] + ## Package structure The structure of the packages in the `moment_kinetics` repo is set up so that diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 0f80b4992..bf1c45416 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -20,6 +20,7 @@ using ..input_structs using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info using MPI +using OrderedCollections: OrderedDict """ structure containing basic information related to coordinates @@ -204,7 +205,7 @@ function get_coordinate_input(input_dict, name; ignore_MPI=false) if input_dict === nothing boundary_parameters = nothing else - boundary_parameters_defaults = Dict{Symbol,Any}() + boundary_parameters_defaults = OrderedDict{Symbol,Any}() if name == "z" # parameter controlling the cutoff of the ion distribution function in the vpa # domain at the wall in z diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 6fbd35a96..2eeeb3ac1 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -31,6 +31,7 @@ using ..timer_utils using ..velocity_moments: get_density using MPI +using OrderedCollections: OrderedDict """ setup_external_sources!(input_dict, r, z) @@ -156,10 +157,14 @@ function setup_external_sources!(input_dict, r, z, electron_physics) * "density_midpoint_control, energy, alphas, alphas-with-losses, " * "beam, beam-with-losses") end - return ion_source_data(; Dict(Symbol(k)=>v for (k,v) ∈ input)..., r_amplitude, - z_amplitude=z_amplitude, PI_density_target=PI_density_target, - PI_controller_amplitude, controller_source_profile, - PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) + return ion_source_data(; OrderedDict(Symbol(k)=>v for (k,v) ∈ input)..., + r_amplitude=r_amplitude, z_amplitude=z_amplitude, + PI_density_target=PI_density_target, + PI_controller_amplitude=PI_controller_amplitude, + controller_source_profile=controller_source_profile, + PI_density_target_ir=PI_density_target_ir, + PI_density_target_iz=PI_density_target_iz, + PI_density_target_rank=PI_density_target_rank) end function get_settings_neutrals(source_index, active_flag) @@ -313,10 +318,14 @@ function setup_external_sources!(input_dict, r, z, electron_physics) * "beam, beam-with-losses, recycling (for neutrals only)") end - return neutral_source_data(; Dict(Symbol(k)=>v for (k,v) ∈ input)..., r_amplitude, - z_amplitude=z_amplitude, PI_density_target=PI_density_target, - PI_controller_amplitude, controller_source_profile, - PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) + return neutral_source_data(; OrderedDict(Symbol(k)=>v for (k,v) ∈ input)..., + r_amplitude=r_amplitude, z_amplitude=z_amplitude, + PI_density_target=PI_density_target, + PI_controller_amplitude=PI_controller_amplitude, + controller_source_profile=controller_source_profile, + PI_density_target_ir=PI_density_target_ir, + PI_density_target_iz=PI_density_target_iz, + PI_density_target_rank=PI_density_target_rank) end function get_settings_electrons(i, ion_settings) # Note most settings for the electron source are copied from the ion source, @@ -339,9 +348,12 @@ function setup_external_sources!(input_dict, r, z, electron_physics) end input["source_strength"] = ion_settings.source_strength end - return electron_source_data(input["source_strength"], input["source_T"], - ion_settings.active, ion_settings.r_amplitude, - ion_settings.z_amplitude, ion_settings.source_type) + return electron_source_data(source_strength=input["source_strength"], + source_T=input["source_T"], + active=ion_settings.active, + r_amplitude=ion_settings.r_amplitude, + z_amplitude=ion_settings.z_amplitude, + source_type=ion_settings.source_type) end # put all ion sources into ion_source_data struct vector diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 227d6eb66..169d5e495 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -2584,7 +2584,11 @@ function write_timing_data(io_moments, t_idx, dfns=false) new_timer_names = String[] function get_names_inner(this_timer, timer_names_subdict, prefix) names_subdict_keys = keys(timer_names_subdict) - for k ∈ keys(this_timer.inner_timers) + inner_timers_keys = collect(keys(this_timer.inner_timers)) + # Sort keys to ensure that the list created by this function has a + # deterministic order. + sort!(inner_timers_keys) + for k ∈ inner_timers_keys this_name = prefix == "" ? k : prefix * ";" * k if k ∉ names_subdict_keys push!(new_timer_names, this_name) @@ -2799,9 +2803,9 @@ function write_timing_data(io_moments, t_idx, dfns=false) # Collect the timing data onto the root process of each block if block_rank[] == 0 - times_data = Dict{mk_int,Vector{mk_int}}() - ncalls_data = Dict{mk_int,Vector{mk_int}}() - allocs_data = Dict{mk_int,Vector{mk_int}}() + times_data = SortedDict{mk_int,Vector{mk_int}}() + ncalls_data = SortedDict{mk_int,Vector{mk_int}}() + allocs_data = SortedDict{mk_int,Vector{mk_int}}() times_data[0], ncalls_data[0], allocs_data[0] = get_data_from_timers() for irank ∈ 1:block_size[]-1 this_global_rank = global_rank[] + irank diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl index fb5ac876b..001d90824 100644 --- a/moment_kinetics/src/fokker_planck.jl +++ b/moment_kinetics/src/fokker_planck.jl @@ -45,6 +45,7 @@ using FastGaussQuadrature using Dates using LinearAlgebra: lu, ldiv! using MPI +using OrderedCollections: OrderedDict using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -82,7 +83,7 @@ use_fokker_planck = true nuii = 1.0 frequency_option = "manual" """ -function setup_fkpl_collisions_input(toml_input::Dict) +function setup_fkpl_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference collision frequency (note factor of 1/2 due to definition choices) nuii_fkpl_default = 0.5*get_reference_collision_frequency_ii(reference_params) @@ -117,7 +118,7 @@ function setup_fkpl_collisions_input(toml_input::Dict) if !input_section["use_fokker_planck"] input_section["nuii"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) if input_section["slowing_down_test"] # calculate nu_alphae and vc3 (critical speed of slowing down) diff --git a/moment_kinetics/src/geo.jl b/moment_kinetics/src/geo.jl index 41be4d0db..933598a65 100644 --- a/moment_kinetics/src/geo.jl +++ b/moment_kinetics/src/geo.jl @@ -14,6 +14,8 @@ using ..array_allocation: allocate_float using ..type_definitions: mk_float, mk_int using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + """ struct containing the geometric data necessary for non-trivial axisymmetric geometries, to be passed @@ -81,7 +83,7 @@ DeltaB = 0.0 option = "" """ -function setup_geometry_input(toml_input::Dict) +function setup_geometry_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) reference_rhostar = get_default_rhostar(reference_params) @@ -105,7 +107,7 @@ function setup_geometry_input(toml_input::Dict) # constant for testing nonzero dBdr when nr = 1 dBdr_constant = 0.0) - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) return geometry_input(; input...) end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index f427c3a83..edcdb9a8a 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -17,12 +17,15 @@ export io_input export pp_input export geometry_input export set_defaults_and_check_top_level!, set_defaults_and_check_section!, - check_sections!, options_to_TOML, Dict_to_NamedTuple + check_sections!, options_to_TOML, Dict_to_NamedTuple, + convert_to_sorted_nested_OptionsDict using ..communication -using ..type_definitions: mk_float, mk_int +using ..type_definitions: mk_float, mk_int, OptionsDict +using DataStructures: SortedDict using MPI +using OrderedCollections: OrderedDict using TOML """ @@ -730,7 +733,7 @@ import Base: get Utility method for converting a string to an Enum when getting from a Dict, based on the type of the default value """ -function get(d::Dict, key, default::Enum) +function get(d::OptionsDict, key, default::Enum) val_maybe_string = get(d, key, nothing) if val_maybe_string == nothing return default @@ -750,14 +753,12 @@ end Set the defaults for options in the top level of the input, and check that there are not any unexpected options (i.e. options that have no default). -Modifies the options[section_name]::Dict by adding defaults for any values that are not -already present. +Modifies the options[section_name]::OptionsDict by adding defaults for any values that +are not already present. Ignores any sections, as these will be checked separately. """ -function set_defaults_and_check_top_level!(options::AbstractDict; kwargs...) - DictType = typeof(options) - +function set_defaults_and_check_top_level!(options::OptionsDict; kwargs...) # Check for any unexpected values in the options - all options that are set should be # present in the kwargs of this function call options_keys_symbols = keys(kwargs) @@ -781,13 +782,12 @@ function set_defaults_and_check_top_level!(options::AbstractDict; kwargs...) return options end -function _get_section_and_check_option_names(options, section_name, section_keys) - - DictType = typeof(options) +function _get_section_and_check_option_names(options::OptionsDict, section_name, + section_keys) if !(section_name ∈ keys(options)) # If section is not present, create it - options[section_name] = DictType() + options[section_name] = OptionsDict() end if !isa(options[section_name], AbstractDict) @@ -839,11 +839,10 @@ const _section_check_store_name = "_section_check_store" Set the defaults for options in a section, and check that there are not any unexpected options (i.e. options that have no default). -Modifies the options[section_name]::Dict by adding defaults for any values that are not -already present. +Modifies the options[section_name]::OptionsDict by adding defaults for any values that +are not already present. """ -function set_defaults_and_check_section!(options::AbstractDict, section_name; - kwargs...) +function set_defaults_and_check_section!(options::OptionsDict, section_name; kwargs...) section_keys_symbols = keys(kwargs) section_keys = (String(k) for k ∈ section_keys_symbols) @@ -865,7 +864,7 @@ function set_defaults_and_check_section!(options::AbstractDict, section_name; end """ - set_defaults_and_check_section!(options::AbstractDict, struct_type::Type, + set_defaults_and_check_section!(options::OptionsDict, struct_type::Type, name::Union{String,Nothing}=nothing) Alternative form to be used when the options should be stored in a struct of type @@ -874,15 +873,15 @@ Alternative form to be used when the options should be stored in a struct of typ The returned instance of `struct_type` is immutable, so if you need to modify the settings - e.g. to apply some logic to set defaults depending on other settings/parameters - then you should use the 'standard' version of [`set_defaults_and_check_section!`](@ref) that -returns a `Dict` that can be modified, and then use that `Dict` to initialise the -`struct_type`. +returns a `OptionsDict` that can be modified, and then use that `OptionsDict` to +initialise the `struct_type`. The name of the section in the options that will be read defaults to the name of `struct_type`, but can be set using the `section_name` argument. Returns an instance of `struct_type`. """ -function set_defaults_and_check_section!(options::AbstractDict, struct_type::Type, +function set_defaults_and_check_section!(options::OptionsDict, struct_type::Type, section_name::Union{String,Nothing}=nothing) if section_name === nothing @@ -910,12 +909,12 @@ function set_defaults_and_check_section!(options::AbstractDict, struct_type::Typ end """ - check_sections!(options::AbstractDict) + check_sections!(options::OptionsDict) Check that there are no unexpected sections in `options`. The 'expected sections' are the ones that were defined with [`set_defaults_and_check_section!`](@ref). """ -function check_sections!(options::AbstractDict; check_no_top_level_options=true) +function check_sections!(options::OptionsDict; check_no_top_level_options=true) expected_section_names = pop!(options, _section_check_store_name) @@ -945,18 +944,38 @@ function check_sections!(options::AbstractDict; check_no_top_level_options=true) end """ -Convert a Dict whose keys are String or Symbol to a NamedTuple + convert_to_sorted_nested_OptionsDict(d::AbstractDict) + +To ensure consistency when writing options to an output file, the entries in the +dictionary containing the options must be in a deterministic order. As TOML reads +options into a nested `Dict`, the only way to guarantee this is to sort the options +before storing them in an `OptionsDict`. `OptionsDict` is an alias for +`OrderedDict{String,Any}` so it will preserve the order of entries as long as they were +in a consistent order when it was created. +""" +function convert_to_sorted_nested_OptionsDict(d::AbstractDict) + sorted_d = SortedDict{String,Any}(d) + for (k,v) ∈ pairs(sorted_d) + if isa(v, AbstractDict) + sorted_d[k] = convert_to_sorted_nested_OptionsDict(v) + end + end + return OptionsDict(sorted_d) +end + +""" +Convert an OrderedDict whose keys are String or Symbol to a NamedTuple Useful as NamedTuple is immutable, so option values cannot be accidentally changed. """ -function Dict_to_NamedTuple(d) +function Dict_to_NamedTuple(d::OrderedDict) return NamedTuple(Symbol(k)=>v for (k,v) ∈ d) end """ options_to_toml(io::IO [=stdout], data::AbstractDict; sorted=false, by=identity) -Convert `moment_kinetics` 'options' (in the form of a `Dict`) to TOML format. +Convert `moment_kinetics` 'options' (in the form of an `AbstractDict`) to TOML format. This function is defined so that we can handle some extra types, for example `Enum`. diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 6e37d6dfc..f0a99404d 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -15,6 +15,8 @@ using ..reference_parameters: get_reference_collision_frequency_ii, get_reference_collision_frequency_ei using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + """ Function for reading Krook collision operator input parameters. Structure the namelist as follows. @@ -24,7 +26,7 @@ use_krook = true nuii0 = 1.0 frequency_option = "manual" """ -function setup_krook_collisions_input(toml_input::Dict) +function setup_krook_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference collision frequency nuii_krook_default = get_reference_collision_frequency_ii(reference_params) @@ -63,7 +65,7 @@ function setup_krook_collisions_input(toml_input::Dict) input_section["nuee0"] = -1.0 input_section["nuei0"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) return krook_collisions_input(; input...) end diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 53ceda028..e6b9576ce 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -39,6 +39,7 @@ using ..z_advection: update_speed_z! using Glob using HDF5 using MPI +using OrderedCollections: OrderedDict const timestep_diagnostic_variables = ("time_for_run", "step_counter", "dt", "failure_counter", "failure_caused_by", @@ -664,7 +665,7 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, vzeta, vr, vz, parallel_io) # Test whether any interpolation is needed - interpolation_needed = Dict( + interpolation_needed = OrderedDict( x.name => (restart_x !== nothing && (x.n != restart_x.n || !all(isapprox.(x.grid, restart_x.grid)))) @@ -1105,7 +1106,7 @@ function reload_electron_data!(pdf, moments, t_params, restart_prefix_iblock, ti vzeta, vr, vz, parallel_io) # Test whether any interpolation is needed - interpolation_needed = Dict( + interpolation_needed = OrderedDict( x.name => x.n != restart_x.n || !all(isapprox.(x.grid, restart_x.grid)) for (x, restart_x) ∈ ((z, restart_z), (r, restart_r), (vperp, restart_vperp), (vpa, restart_vpa))) @@ -3595,7 +3596,7 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin evolving_variables = Tuple(evolving_variables) end - timing_variable_names = Dict{mk_int, Vector{String}}() + timing_variable_names = OrderedDict{mk_int, Vector{String}}() for fid ∈ fids0 timing_group = get_group(fid, "timing_data") timing_rank_names = collect(k for k in keys(timing_group) if startswith(k, "rank")) diff --git a/moment_kinetics/src/maxwell_diffusion.jl b/moment_kinetics/src/maxwell_diffusion.jl index 73a36c44a..85d2d4639 100644 --- a/moment_kinetics/src/maxwell_diffusion.jl +++ b/moment_kinetics/src/maxwell_diffusion.jl @@ -21,6 +21,8 @@ using ..calculus: second_derivative! using ..timer_utils using ..reference_parameters: get_reference_collision_frequency_ii, setup_reference_parameters +using OrderedCollections: OrderedDict + """ Function for reading Maxwell diffusion operator input parameters. Structure the namelist as follows. @@ -30,7 +32,7 @@ use_maxwell_diffusion = true D_ii = 1.0 diffusion_coefficient_option = "manual" """ -function setup_mxwl_diff_collisions_input(toml_input::Dict) +function setup_mxwl_diff_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference diffusion coefficient, made up of collision frequency and # thermal speed for now. NOTE THAT THIS CONSTANT PRODUCES ERRORS. DO NOT USE @@ -63,7 +65,7 @@ function setup_mxwl_diff_collisions_input(toml_input::Dict) input_section["D_ii"] = -1.0 input_section["D_nn"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) return mxwl_diff_collisions_input(; input...) end diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 1f2c86593..e400400a8 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -187,7 +187,7 @@ function run_moment_kinetics() end restart_time_index = options["restart-time-index"] if inputfile === nothing - this_input = Dict() + this_input = OptionsDict() else this_input = inputfile end diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 9f49aef52..18e78aeb6 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -31,7 +31,7 @@ using TOML Read input from a TOML file """ function read_input_file(input_filename::String) - input = TOML.parsefile(input_filename) + input = convert_to_sorted_nested_OptionsDict(TOML.parsefile(input_filename)) # Use input_filename (without the extension) as default for "run_name" if !("output" ∈ keys(input) && "run_name" in keys(input["output"])) diff --git a/moment_kinetics/src/species_input.jl b/moment_kinetics/src/species_input.jl index 59d7043fe..dc9067891 100644 --- a/moment_kinetics/src/species_input.jl +++ b/moment_kinetics/src/species_input.jl @@ -13,6 +13,8 @@ using ..input_structs: spatial_initial_condition_input, velocity_initial_conditi using ..input_structs: boltzmann_electron_response, boltzmann_electron_response_with_simple_sheath using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + function get_species_input(toml_input) reference_params = setup_reference_parameters(toml_input) @@ -79,7 +81,7 @@ function get_species_input(toml_input) velocity_initial_condition_input, "vpa_IC_ion_species_$is") - spec_input = Dict(Symbol(k)=>v for (k,v) in spec_section) + spec_input = OrderedDict(Symbol(k)=>v for (k,v) in spec_section) ion_spec_params_list[is] = ion_species_parameters(; type="ion", z_IC=z_IC, r_IC=r_IC, vpa_IC=vpa_IC, spec_input...) @@ -106,14 +108,14 @@ function get_species_input(toml_input) velocity_initial_condition_input, "vz_IC_neutral_species_$isn") - spec_input = Dict(Symbol(k)=>v for (k,v) in spec_section) + spec_input = OrderedDict(Symbol(k)=>v for (k,v) in spec_section) neutral_spec_params_list[isn] = neutral_species_parameters(; type="neutral", z_IC=z_IC, r_IC=r_IC, vz_IC=vz_IC, spec_input...) end # construct composition struct - composition_input = Dict(Symbol(k)=>v for (k,v) in composition_section) + composition_input = OrderedDict(Symbol(k)=>v for (k,v) in composition_section) composition = species_composition(; n_species=nspec_tot, ion=ion_spec_params_list, neutral=neutral_spec_params_list, composition_input...) diff --git a/moment_kinetics/src/timer_utils.jl b/moment_kinetics/src/timer_utils.jl index 1fadff14e..951414707 100644 --- a/moment_kinetics/src/timer_utils.jl +++ b/moment_kinetics/src/timer_utils.jl @@ -37,14 +37,14 @@ const TimerNamesDict = SortedDict{String,SortedDict,Base.Order.ForwardOrdering} TimerNamesDict() = TimerNamesDict(Base.Order.ForwardOrdering()) """ -Nested Dict containting the names of all timers that have been created on each MPI rank -and added to the moments output file. +Nested SortedDict containting the names of all timers that have been created on each MPI +rank and added to the moments output file. """ const timer_names_per_rank_moments = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() """ -Nested Dict containting the names of all timers that have been created on each MPI rank -and added to the dfns output file. +Nested SortedDict containting the names of all timers that have been created on each MPI +rank and added to the dfns output file. """ const timer_names_per_rank_dfns = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() diff --git a/moment_kinetics/src/type_definitions.jl b/moment_kinetics/src/type_definitions.jl index e53fea73d..b5b14fafc 100644 --- a/moment_kinetics/src/type_definitions.jl +++ b/moment_kinetics/src/type_definitions.jl @@ -6,6 +6,8 @@ export mk_float export mk_int export OptionsDict +using OrderedCollections: OrderedDict + """ """ const mk_float = Float64 @@ -16,6 +18,6 @@ const mk_int = Int64 """ """ -const OptionsDict = Dict{String,Any} +const OptionsDict = OrderedDict{String,Any} end diff --git a/moment_kinetics/src/utils.jl b/moment_kinetics/src/utils.jl index ce92c5d98..f38954a29 100644 --- a/moment_kinetics/src/utils.jl +++ b/moment_kinetics/src/utils.jl @@ -30,14 +30,14 @@ function __init__() end """ - get_unnormalized_parameters(input::Dict) + get_unnormalized_parameters(input::AbstractDict) get_unnormalized_parameters(input_filename::String) Get many parameters for the simulation setup given by `input` or in the file `input_filename`, in SI units and eV, returned as an OrderedDict. """ function get_unnormalized_parameters end -function get_unnormalized_parameters(input::Dict) +function get_unnormalized_parameters(input::AbstractDict) io_input, evolve_moments, t_params, z, z_spectral, r, r_spectral, vpa, vpa_spectral, vperp, vperp_spectral, gyrophase, gyrophase_spectral, vz, vz_spectral, vr, vr_spectral, vzeta, vzeta_spectral, composition, species, collisions, geometry, @@ -90,8 +90,8 @@ end """ print_unnormalized_parameters(input) -Print many parameters for the simulation setup given by `input` (a Dict of parameters or -a String giving a filename), in SI units and eV. +Print many parameters for the simulation setup given by `input` (an AbstractDict of +parameters or a String giving a filename), in SI units and eV. """ function print_unnormalized_parameters(args...; kwargs...) @@ -325,10 +325,9 @@ function recursive_merge(a, b) end """ -Dict merge function for named keyword arguments -for case when input Dict is a mixed Dict of Dicts -and non-Dict float/int/string entries, and the -keyword arguments are also a mix of Dicts and non-Dicts +Dict merge function for named keyword arguments for case when input AbstractDict is a +mixed AbstractDict of AbstractDicts and non-AbstractDict float/int/string entries, and +the keyword arguments are also a mix of AbstractDicts and non-AbstractDicts """ function merge_dict_with_kwargs!(dict_base; args...) for (k,v) in args diff --git a/moment_kinetics/test/harrisonthompson.jl b/moment_kinetics/test/harrisonthompson.jl index d0ec7f31b..64cc24e66 100644 --- a/moment_kinetics/test/harrisonthompson.jl +++ b/moment_kinetics/test/harrisonthompson.jl @@ -62,63 +62,63 @@ function findphi(z, R_ion) end # default inputs for tests -test_input_finite_difference = Dict("composition" => OptionsDict("n_ion_species" => 1, - "n_neutral_species" => 0, - "electron_physics" => "boltzmann_electron_response", - "T_e" => 1.0, - "T_wall" => 1.0), - "ion_species_1" => OptionsDict("initial_density" => 1.0, - "initial_temperature" => 1.0), - "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 0.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 1.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "output" => OptionsDict("run_name" => "finite_difference", - "parallel_io" => false), - "evolve_moments" => OptionsDict("density" => false, - "parallel_flow" => false, - "parallel_pressure" => false, - "moments_conservation" => false), - "reactions" => OptionsDict("charge_exchange_frequency" => 0.0, - "ionization_frequency" => 0.0), - "timestepping" => OptionsDict("nstep" => 9000, - "dt" => 0.0005, - "nwrite" => 9000, - "split_operators" => false), - "r" => OptionsDict("ngrid" => 1, - "nelement" => 1, - "bc" => "periodic", - "discretization" => "finite_difference"), - "z" => OptionsDict("ngrid" => 100, - "nelement" => 1, - "bc" => "wall", - "discretization" => "finite_difference"), - "vpa" => OptionsDict("ngrid" => 200, - "nelement" => 1, - "L" => 8.0, - "bc" => "zero", - "discretization" => "finite_difference"), - "vz" => OptionsDict("ngrid" => 200, - "nelement" => 1, - "L" => 8.0, - "bc" => "zero", - "discretization" => "finite_difference"), - "ion_source_1" => OptionsDict("active" => true, - "source_strength" => ionization_frequency, - "source_T" => 0.25, - "z_profile" => "constant", - "r_profile" => "constant"), - ) +test_input_finite_difference = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, + "n_neutral_species" => 0, + "electron_physics" => "boltzmann_electron_response", + "T_e" => 1.0, + "T_wall" => 1.0), + "ion_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0), + "output" => OptionsDict("run_name" => "finite_difference", + "parallel_io" => false), + "evolve_moments" => OptionsDict("density" => false, + "parallel_flow" => false, + "parallel_pressure" => false, + "moments_conservation" => false), + "reactions" => OptionsDict("charge_exchange_frequency" => 0.0, + "ionization_frequency" => 0.0), + "timestepping" => OptionsDict("nstep" => 9000, + "dt" => 0.0005, + "nwrite" => 9000, + "split_operators" => false), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + "bc" => "periodic", + "discretization" => "finite_difference"), + "z" => OptionsDict("ngrid" => 100, + "nelement" => 1, + "bc" => "wall", + "discretization" => "finite_difference"), + "vpa" => OptionsDict("ngrid" => 200, + "nelement" => 1, + "L" => 8.0, + "bc" => "zero", + "discretization" => "finite_difference"), + "vz" => OptionsDict("ngrid" => 200, + "nelement" => 1, + "L" => 8.0, + "bc" => "zero", + "discretization" => "finite_difference"), + "ion_source_1" => OptionsDict("active" => true, + "source_strength" => ionization_frequency, + "source_T" => 0.25, + "z_profile" => "constant", + "r_profile" => "constant"), + ) test_input_chebyshev = recursive_merge(test_input_finite_difference, OptionsDict("output" => OptionsDict("run_name" => "chebyshev_pseudospectral"), diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 7e48cc80f..e61ae7d33 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -131,57 +131,57 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix" "discretization" => "gausslegendre_pseudospectral", "element_spacing_option" => "coarse_tails", ), - "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", - "implicit_electron_advance" => false, - "implicit_electron_ppar" => true, - "implicit_ion_advance" => false, - "implicit_vpa_advection" => false, - "nstep" => 1, - "dt" => ion_dt, - "minimum_dt" => 1.0e-7, - "rtol" => 1.0e-4, - "max_increase_factor_near_last_fail" => 1.001, - "last_fail_proximity_factor" => 1.1, - "max_increase_factor" => 1.05, - "nwrite" => 10000, - "nwrite_dfns" => 10000, - "steady_state_residual" => true, - "converged_residual_value" => 1.0e-3, - ), - "electron_timestepping" => Dict{String,Any}("nstep" => 1, - "dt" => dt, - "maximum_dt" => 1.0, - "nwrite" => 10000, - "nwrite_dfns" => 100000, - "type" => "Fekete4(3)", - "rtol" => 1.0e-6, - "atol" => 1.0e-14, - "minimum_dt" => 1.0e-10, - "initialization_residual_value" => 2.5, - "converged_residual_value" => 1.0e-2, - "constraint_forcing_rate" => 2.321, - ), - "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, - "rtol" => 1.0e-5, - "atol" => 1.0e-15, - "preconditioner_update_interval" => 1, + "timestepping" => OptionsDict("type" => "KennedyCarpenterARK324", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 1, + "dt" => ion_dt, + "minimum_dt" => 1.0e-7, + "rtol" => 1.0e-4, + "max_increase_factor_near_last_fail" => 1.001, + "last_fail_proximity_factor" => 1.1, + "max_increase_factor" => 1.05, + "nwrite" => 10000, + "nwrite_dfns" => 10000, + "steady_state_residual" => true, + "converged_residual_value" => 1.0e-3, + ), + "electron_timestepping" => OptionsDict("nstep" => 1, + "dt" => dt, + "maximum_dt" => 1.0, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "type" => "Fekete4(3)", + "rtol" => 1.0e-6, + "atol" => 1.0e-14, + "minimum_dt" => 1.0e-10, + "initialization_residual_value" => 2.5, + "converged_residual_value" => 1.0e-2, + "constraint_forcing_rate" => 2.321, ), - "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, + "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100, + "rtol" => 1.0e-5, + "atol" => 1.0e-15, + "preconditioner_update_interval" => 1, + ), + "ion_numerical_dissipation" => OptionsDict("vpa_dissipation_coefficient" => 1.0e0, + "force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => OptionsDict("vpa_dissipation_coefficient" => 2.0, "force_minimum_pdf_value" => 0.0, ), - "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, - "force_minimum_pdf_value" => 0.0, - ), - "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, - "force_minimum_pdf_value" => 0.0, - ), - "ion_source_1" => Dict{String,Any}("active" => true, - "z_profile" => "gaussian", - "z_width" => 0.125, - "source_strength" => 0.1, - "source_T" => 2.0, - ), - "krook_collisions" => Dict{String,Any}("use_krook" => true), + "neutral_numerical_dissipation" => OptionsDict("vz_dissipation_coefficient" => 1.0e-1, + "force_minimum_pdf_value" => 0.0, + ), + "ion_source_1" => OptionsDict("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.125, + "source_strength" => 0.1, + "source_T" => 2.0, + ), + "krook_collisions" => OptionsDict("use_krook" => true), ) function get_mk_state(test_input) From 286cc1d5c6dac116b84e5947a890c7a2a7df149c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 6 Nov 2024 16:23:39 +0000 Subject: [PATCH 05/15] Fix "Allow overwriting when writing a single value to output file" Not sure whether deleting and re-creating String variables does was actually causing a problem (suspect now that it was not), but anyway, requiring that Strings always have the same length when overwriting seems a bit nicer and may be slightly more efficient. --- moment_kinetics/src/file_io.jl | 2 ++ moment_kinetics/src/file_io_hdf5.jl | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 169d5e495..bffab4a01 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -711,6 +711,8 @@ Write a single variable to a file or group. If a description or units are passed attributes of the variable. If `overwrite=true` is passed, overwrite the variable if it already exists in the file. +Note that when overwriting a `String` variable, the new `String` must have exactly the +same length as the original `String`. """ function write_single_value! end diff --git a/moment_kinetics/src/file_io_hdf5.jl b/moment_kinetics/src/file_io_hdf5.jl index 8b9ff40e5..413c8f5a8 100644 --- a/moment_kinetics/src/file_io_hdf5.jl +++ b/moment_kinetics/src/file_io_hdf5.jl @@ -92,9 +92,6 @@ function write_single_value!(file_or_group::HDF5.H5DataStore, name, description=nothing, units=nothing, overwrite=false) where {T,N} if isa(data, Union{Number, AbstractString}) - if overwrite && name ∈ keys(file_or_group) - delete_object(file_or_group, name) - end # When we write a scalar, and parallel_io=true, we need to create the variable on # every process in `comm_inter_block[]` but we only want to actually write the # data from one process (we choose `global_rank[]==0`) to avoid corruption due to @@ -106,15 +103,17 @@ function write_single_value!(file_or_group::HDF5.H5DataStore, name, # length of the string, so cannot be easily created by hand. Note that a String of # the correct length must be passed from every process in `comm_inter_block[]`, # but only the contents of the string on `global_rank[]==0` are actually written. - io_var, var_hdf5_type = create_dataset(file_or_group, name, data) - if !parallel_io || global_rank[] == 0 - write_dataset(io_var, var_hdf5_type, data) + if !(overwrite && name ∈ keys(file_or_group)) + create_dataset(file_or_group, name, data) + if description !== nothing + add_attribute!(file_or_group[name], "description", description) + end + if units !== nothing + add_attribute!(file_or_group[name], "units", units) + end end - if description !== nothing - add_attribute!(file_or_group[name], "description", description) - end - if units !== nothing - add_attribute!(file_or_group[name], "units", units) + if !parallel_io || global_rank[] == 0 + write(file_or_group[name], data) end return nothing end From 7cc2373ab20bf65e80d1b5b930af2166ee10dc4e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 6 Nov 2024 15:54:01 +0000 Subject: [PATCH 06/15] Fix "Write timing data to output files" Convert "global_timer_string" to a fixed-length String (with a hard-coded length) before (over-)writing. This hopefully avoids HDF5 errors. Ensure that `global_timer_string` can only contain ASCII characters. Reset timers during cleanup rather than at the beginning of `run_moment_kinetics()`, which helps to keep the timers in a consistent state, e.g. for tests that do not use `run_moment_kinetics()` but be run after another run that created timers. --- moment_kinetics/src/file_io.jl | 41 ++++++++++++++++++-------- moment_kinetics/src/moment_kinetics.jl | 6 ++-- moment_kinetics/src/timer_utils.jl | 1 + 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index bffab4a01..8cdefccf1 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -2959,6 +2959,9 @@ function write_timing_data(io_moments, t_idx, dfns=false) end end + # Pick a fixed size for "global_timer_string" so that we can overwrite the variable + # without needing to resize it. + global_timer_string_size = 10000 # 100 characters x 100 lines seems like a reasonable maximum size. if global_rank[] == 0 || (block_rank[] == 0 && !parallel_io) if t_idx > 1 || t_idx == -1 if t_idx == -1 @@ -2966,6 +2969,17 @@ function write_timing_data(io_moments, t_idx, dfns=false) else if "moment_kinetics" ∈ keys(timer_names_per_rank[global_rank[]]) top_level = ("moment_kinetics", "time_advance! step", "ssp_rk!") + this_dict = timer_names_all_ranks + + # Check all the expected levels are present, otherwise just set + # top_level=nothing. + for n ∈ top_level + if n ∉ keys(this_dict) + top_level = nothing + break + end + this_dict = this_dict[n] + end else # If `time_advance!()` was called in a non-standard way (i.e. not by # `run_moment_kinetics()`), the actual timers might be different. In @@ -2977,10 +2991,15 @@ function write_timing_data(io_moments, t_idx, dfns=false) # was printed to the terminal, for a quick look. string_to_write = format_global_timer(; show_output=false, top_level=top_level) - string_size = Ref(length(string_to_write)) - if parallel_io - MPI.Bcast!(string_size, comm_inter_block[]; root=0) - end + + # Ensure `string_to_write` is no longer than `global_timer_string_size`. + string_to_write = string_to_write[1:min(length(string_to_write), global_timer_string_size)] + # Ensure `string_to_write` is at least as long as `global_timer_string_size`. + # Do this way instead of using `rpad()` because `rpad()` measures the length + # using `textwidth()` rather than a raw character count, whereas we want a + # fixed number of ASCII characters to write to the output file. + string_to_write = string_to_write * ' '^(global_timer_string_size - length(string_to_write)) + write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; parallel_io=parallel_io, @@ -2991,15 +3010,11 @@ function write_timing_data(io_moments, t_idx, dfns=false) if t_idx > 1 || t_idx == -1 # Although only global_rank[]==0 needs to write "global_timer_string" when we # are using parallel I/O, other ranks in `comm_inter_block[]` must also call - # `write_single_value!() so that the variable in the HDF5 file can be - # (re-)created. - # ???These other ranks do not actually write data though, so it does not - # matter what is passed to the `data` argument of `write_single_value!()` (as - # long as it is a scalar). - # ? Need to pass a string with the right length? - string_size = Ref(0) - MPI.Bcast!(string_size, comm_inter_block[]; root=0) - string_to_write = " " ^ string_size[] + # `write_single_value!() so that the variable in the HDF5 file can be created. + # These other ranks do not actually write data though, so it does not matter + # what is passed to the `data` argument of `write_single_value!()` (as + # long as it is a string with the right length). + string_to_write = " " ^ global_timer_string_size write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; parallel_io=parallel_io, description="Formatted representation diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index e400400a8..3309ca8ed 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -125,9 +125,6 @@ function run_moment_kinetics(input_dict::OptionsDict; restart=false, restart_tim mk_state = nothing try - # Reset timers in case a previous run was timed - reset_mk_timers!() - @timeit global_timer "moment_kinetics" begin # set up all the structs, etc. needed for a run mk_state = setup_moment_kinetics(input_dict; restart=restart, @@ -404,6 +401,9 @@ function cleanup_moment_kinetics!(ascii_io, io_moments, io_dfns) end end + # Reset timers + reset_mk_timers!() + # clean up MPI objects finalize_comms!() diff --git a/moment_kinetics/src/timer_utils.jl b/moment_kinetics/src/timer_utils.jl index 951414707..eba756f2e 100644 --- a/moment_kinetics/src/timer_utils.jl +++ b/moment_kinetics/src/timer_utils.jl @@ -165,6 +165,7 @@ function format_global_timer(; show_output=false, threshold=1.0e-3, truncate_out # when we save the string to an HDF5 or NetCDF file. μ often appears because times # may be printed in microseconds. result = replace(result, "μ" => "u") + result = ascii(replace(result, !isascii=>' ')) end end From 6389e59ff3591c6f53910a2dde00eac035c9e9a6 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 8 Nov 2024 22:36:05 +0000 Subject: [PATCH 07/15] Fix formatting of global_timer_string's description Inconsistent formatting on different ranks makes the description string different lengths, which causes HDF5 errors. --- moment_kinetics/src/file_io.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 8cdefccf1..69122a815 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -2962,6 +2962,7 @@ function write_timing_data(io_moments, t_idx, dfns=false) # Pick a fixed size for "global_timer_string" so that we can overwrite the variable # without needing to resize it. global_timer_string_size = 10000 # 100 characters x 100 lines seems like a reasonable maximum size. + global_timer_description = "Formatted representation of global_timer" if global_rank[] == 0 || (block_rank[] == 0 && !parallel_io) if t_idx > 1 || t_idx == -1 if t_idx == -1 @@ -3003,7 +3004,7 @@ function write_timing_data(io_moments, t_idx, dfns=false) write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; parallel_io=parallel_io, - description="Formatted representation of global_timer", + description=global_timer_description, overwrite=true) end elseif block_rank[] == 0 @@ -3017,8 +3018,9 @@ function write_timing_data(io_moments, t_idx, dfns=false) string_to_write = " " ^ global_timer_string_size write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; - parallel_io=parallel_io, description="Formatted representation - of global_timer", overwrite=true) + parallel_io=parallel_io, + description=global_timer_description, + overwrite=true) end end From c825a1b1d1240451399ff46cca83d7852ac92f37 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 11 Nov 2024 19:10:28 +0000 Subject: [PATCH 08/15] Pass around io_input on all ranks On ranks other than the root of each shared-memory block, set `io_moments` and `io_dfns` to `(io_input=io_input)` so that the input (e.g. `parallel_io` setting) is available on all ranks. --- moment_kinetics/src/file_io.jl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 69122a815..89e242bbc 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -494,8 +494,8 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe return ascii, io_moments, io_dfns end - # For other processes in the block, return (nothing, nothing, nothing) - return nothing, nothing, nothing + # For other processes in the block, return objects with just the input. + return nothing, (io_input=io_input,), (io_input=io_input,) end """ @@ -605,8 +605,8 @@ function setup_electron_io(io_input, vpa, vperp, z, r, composition, collisions, return file_info end - # For other processes in the block, return nothing - return nothing + # For other processes in the block, return an object with just the input. + return (io_input=io_input,) end """ @@ -2152,8 +2152,9 @@ function setup_moments_io(prefix, io_input, vz, vr, vzeta, vpa, vperp, r, z, return file_info end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("setup_moments_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2255,8 +2256,9 @@ function reopen_moments_io(file_info) getvar("nl_solver_diagnostics"), io_input) end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("reopen_moments_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2311,8 +2313,9 @@ function setup_dfns_io(prefix, io_input, boundary_distributions, r, z, vperp, vp return file_info end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("setup_dfns_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2427,8 +2430,9 @@ function reopen_dfns_io(file_info) getvar("f_neutral_start_last_timestep"), io_input, io_moments) end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("reopen_dfns_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2466,7 +2470,7 @@ file io_or_file_info_moments, t_idx, time_for_run, t_params, nl_solver_params, r, z, dfns=false) = begin - io_moments = nothing + io_moments = io_or_file_info_moments @serial_region begin # Only read/write from first process in each 'block' @@ -3434,7 +3438,7 @@ binary output file t_params, nl_solver_params, r, z, vperp, vpa, vzeta, vr, vz) = begin io_dfns = nothing - io_dfns_moments = nothing + io_dfns_moments = io_or_file_info_dfns closefile = true @serial_region begin # Only read/write from first process in each 'block' @@ -3641,7 +3645,7 @@ end close output files for electron initialization """ function finish_electron_io( - binary_initial_electron::Union{io_initial_electron_info,Tuple,Nothing,Bool}) + binary_initial_electron::Union{io_initial_electron_info,Tuple,Nothing,Bool,NamedTuple}) @serial_region begin # Only read/write from first process in each 'block' From ee6cdecb0e10b3aa90b10deb3e3721f3d3ad561b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 23 Oct 2024 11:11:14 +0100 Subject: [PATCH 09/15] Update the kinetic electron test with fixed electron_ppar update --- .../test/kinetic_electron_tests.jl | 148 +++++++++--------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 723da1be2..6a41a2c2f 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -108,13 +108,13 @@ kinetic_input["timestepping"] = OptionsDict("type" => "PareschiRusso2(2,2,2)", ) kinetic_input["electron_timestepping"] = OptionsDict("nstep" => 5000000, - "dt" => 5.0e-6, + "dt" => 2.0e-5, "nwrite" => 10000, "nwrite_dfns" => 100000, "decrease_dt_iteration_threshold" => 5000, "increase_dt_iteration_threshold" => 0, "cap_factor_ion_dt" => 10.0, - "initialization_residual_value" => 1.0e10, + "initialization_residual_value" => 1.0e0, "converged_residual_value" => 1.0e-1, ) @@ -122,7 +122,7 @@ kinetic_input["nonlinear_solver"] = OptionsDict("nonlinear_max_iterations" => 10 "rtol" => 1.0e-8, "atol" => 1.0e-14, "linear_restart" => 5, - "preconditioner_update_interval" => 100, + "preconditioner_update_interval" => 1000, ) @@ -172,94 +172,94 @@ function run_test() # Regression test # Benchmark data generated in serial on Linux - expected_Ez = [-0.5990683230706185 -0.604849806235434; - -0.4944296396481284 -0.49739671491727844; - -0.30889032954504736 -0.30924318765687464; - -0.2064830747303776 -0.20682475071884582; - -0.21232457328748663 -0.21299072376949116; - -0.18233875912042674 -0.18256905463006085; - -0.16711429522309232 -0.1673112962636778; - -0.16920776495088916 -0.1693227707158167; - -0.1629417555658927 -0.16304933113558318; - -0.16619150334079993 -0.16629539618289285; - -0.15918194883360942 -0.1592799009526323; - -0.14034706409006803 -0.140437217833422; - -0.12602184032280567 -0.12610387949683538; - -0.10928716440800472 -0.10935785133612701; - -0.07053969674257217 -0.07058573063123225; - -0.0249577746169536 -0.024974174596810936; - -2.8327303308330514e-15 -1.441401377024236e-10; - 0.024957774616960776 0.02497417427570905; - 0.07053969674257636 0.07058572965952663; - 0.10928716440799909 0.10935785264749627; - 0.1260218403227975 0.12610388283669527; - 0.1403470640900294 0.1404372197714126; - 0.1591819488336015 0.15927992284761766; - 0.16619150334082114 0.1662953275454769; - 0.16294175556587748 0.1630489871826757; - 0.16920776495090983 0.1693233489685909; - 0.1671142952230893 0.16731075590341918; - 0.1823387591204167 0.1825740389953209; - 0.21232457328753865 0.21297925141919793; - 0.20648307473037922 0.20682690396901446; - 0.3088903295450278 0.30925854110074175; - 0.4944296396481271 0.49731601862961966; - 0.5990683230705801 0.6046564647413697] - expected_vthe = [27.08102229345079 27.08346736523219; - 27.087730258479823 27.089003820908527; - 27.091898844901323 27.09181784480061; - 27.092455021687254 27.092742387764524; - 27.09350739287911 27.094148133125078; - 27.093817059011126 27.093889601910092; - 27.09443981315218 27.094610141036807; - 27.09484177005478 27.094996783801374; - 27.094985914811055 27.0950626278904; - 27.095122128675094 27.09525702879687; - 27.09536357532887 27.09548558966323; - 27.095582117080163 27.095716810823177; - 27.09568783962135 27.09578276803757; - 27.0957775472326 27.095878610625554; - 27.095909169276535 27.09600041573683; - 27.095978269355648 27.096074922150624; - 27.095988166679223 27.096080134292468; - 27.095978269713978 27.096074922508883; - 27.095909171602027 27.096000418062378; - 27.09577755035281 27.095878613746088; - 27.095687838236376 27.095782766652857; - 27.09558211622511 27.095716809968053; - 27.09536353456768 27.09548554890375; - 27.095122105596843 27.095257005693973; - 27.094986093051983 27.09506280663278; - 27.094841563692096 27.094996577040796; - 27.094439553087433 27.094609881510113; - 27.093813728418613 27.09388627063591; - 27.093489818175936 27.094130555874184; - 27.09246140309467 27.092748772044477; - 27.09185903467811 27.09177803239964; - 27.08774827015981 27.089021820036553; - 27.081240668889404 27.0836857414255] + expected_Ez = [-0.5990683230706185 -1.136483186157602; + -0.4944296396481284 -0.9873296990705788; + -0.30889032954504736 -0.6694380824928302; + -0.2064830747303776 -0.4471331690708596; + -0.21232457328748663 -0.423069171542538; + -0.18233875912042674 -0.3586467595624931; + -0.16711429522309232 -0.3018272987758344; + -0.16920776495088916 -0.27814384649305496; + -0.1629417555658927 -0.26124630661090814; + -0.16619150334079993 -0.2572789330163811; + -0.15918194883360942 -0.23720078037362732; + -0.14034706409006803 -0.20520396656341475; + -0.12602184032280567 -0.1827016549071128; + -0.10928716440800472 -0.15808919669899502; + -0.07053969674257217 -0.10137753767917096; + -0.0249577746169536 -0.0358411459260082; + -2.8327303308330514e-15 -2.0803303361189427e-5; + 0.024957774616960776 0.03584490974053962; + 0.07053969674257636 0.1013692898656727; + 0.10928716440799909 0.15807862358546687; + 0.1260218403227975 0.18263049748179466; + 0.1403470640900294 0.20516566362571026; + 0.1591819488336015 0.23711236692241613; + 0.16619150334082114 0.257126146434857; + 0.16294175556587748 0.2609881259705107; + 0.16920776495090983 0.2778978154805798; + 0.1671142952230893 0.3015349192528757; + 0.1823387591204167 0.3585291689672981; + 0.21232457328753865 0.4231179549656996; + 0.20648307473037922 0.44816400221269476; + 0.3088903295450278 0.6716787105435247; + 0.4944296396481271 0.9861165590258743; + 0.5990683230705801 1.1300034111861956] + expected_vthe = [22.64555285302391 22.485481713141688; + 23.763411647653097 23.63281883616836; + 25.26907160117684 25.181703459470448; + 26.17920352818247 26.12461016686916; + 26.514772631426933 26.476018852279974; + 26.798783188585713 26.774387562937218; + 27.202255545479264 27.203662204308202; + 27.50424749120107 27.527732850637264; + 27.630498656270504 27.6642323848215; + 27.748483758260697 27.79134809261204; + 27.933760382468346 27.990808336620802; + 28.08611508251559 28.153978618442775; + 28.14959662643782 28.221734439130564; + 28.207730844115044 28.283677711828023; + 28.28567669896009 28.36634261525836; + 28.32728392065335 28.410489883644782; + 28.331064506972027 28.41437629072209; + 28.32729968986601 28.41050992096321; + 28.285678151542136 28.366352683865195; + 28.207765527709956 28.28373408727703; + 28.149604559462947 28.221771261090687; + 28.086248527111163 28.154158507899695; + 27.933979289064936 27.991103719847732; + 27.74906125092813 27.792046191405188; + 27.631210333523736 27.66508092926101; + 27.505479130159543 27.529115937508752; + 27.20422756527604 27.20578114592589; + 26.801712351383053 26.77740066591359; + 26.517644511297203 26.478915386575462; + 26.18176436913143 26.127099000267552; + 25.26635932097994 25.178676836919877; + 23.756593489029708 23.625697695979085; + 22.64390166090378 22.48400980852866] if expected_Ez == nothing # Error: no expected input provided println("data tested would be: Ez=", Ez) @test false else - @test isapprox(Ez, expected_Ez, rtol=1.0e-7, atol=1.0e-9) + @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0e-6) end if expected_vthe == nothing # Error: no expected input provided println("data tested would be: vthe=", vthe) @test false else - @test isapprox(vthe, expected_vthe, rtol=2.0e-9, atol=0.0) + @test elementwise_isapprox(vthe, expected_vthe, rtol=1.0e-6, atol=0.0) end # Iteration counts are fairly inconsistent, but it's good to check that they at # least don't unexpectedly increase by an order of magnitude. # Expected iteration count is from a serial run on Linux. - expected_electron_advance_linear_iterations = 11394 - @test electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations - if !(electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations) + expected_electron_advance_linear_iterations = 49307 + @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations + if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations) println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") end end From c83c0036cf691b9110b6429b3e516799e107dc4c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 11 Nov 2024 19:53:25 +0000 Subject: [PATCH 10/15] Write timers in arrays of size global_size[] ...rather than in a separate group for each rank. This avoids creating a very large number of variables in the output file when running on many cores, which seems to help prevent parallel HDF5 errors. --- .../src/makie_post_processing.jl | 252 +++++---- moment_kinetics/src/file_io.jl | 518 ++++++------------ moment_kinetics/src/file_io_hdf5.jl | 6 +- moment_kinetics/src/load_data.jl | 17 +- moment_kinetics/src/timer_utils.jl | 16 +- 5 files changed, 310 insertions(+), 499 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index b881ddfa7..77dd8650c 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -8515,7 +8515,7 @@ function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, ncalls_ax=ncalls_ax, allocs_ax=allocs_ax, irun=irun, figsize=figsize) end - if interactive_figs === nothing && string(Makie.current_backend()) == "GLMakie" + if interactive_figs !== nothing && string(Makie.current_backend()) == "GLMakie" # Can make interactive plots if isa(interactive_figs, Symbol) @@ -8658,147 +8658,159 @@ function timing_data(run_info; plot_prefix=nothing, threshold=nothing, end linestyles = linestyle=[:solid, :dash, :dot, :dashdot, :dashdotdot] - for irank ∈ 0:run_info.nrank-1 - all_global_timer_variables = run_info.timing_variable_names[irank] - time_advance_timer_variables = [v for v ∈ all_global_timer_variables if occursin("time_advance! step", v)] - time_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "time:")] - ncalls_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "ncalls:")] - allocs_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "allocs:")] - - this_rank_group = "timing_data/rank$irank" - - inspector_label_func = (self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])" - - function label_irank(ax, variable, color, unit_conversion=1) - if run_info.nrank > 1 - # Label curves with irank so we can tell which is which - index = ((irank + 1) % (length(variable) - 1)) + 1 - with_theme - text!(ax, run_info.time[index], - variable[index] * unit_conversion; - text="$irank", color=color) - end - end - - function check_include_exclude(variable_name) - explicitly_included = (include_patterns !== nothing && - any(occursin(p, variable_name) for p ∈ include_patterns)) - if exclude_patterns === nothing && include_patterns !== nothing - excluded = !explicitly_included - elseif exclude_patterns !== nothing - if !explicitly_included && - any(occursin(p, variable_name) for p ∈ exclude_patterns) - excluded = true - else - excluded = false - end + time_advance_timer_variables = [v for v ∈ run_info.timing_variable_names if occursin("time_advance! step", v)] + time_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "time:")] + ncalls_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "ncalls:")] + allocs_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "allocs:")] + + timing_group = "timing_data" + + function label_irank(ax, variable, irank, color, unit_conversion=1) + if run_info.nrank > 1 + # Label curves with irank so we can tell which is which + index = ((irank + 1) % (length(variable) - 1)) + 1 + with_theme + text!(ax, run_info.time[index], + variable[index] * unit_conversion; + text="$irank", color=color) + end + end + + function check_include_exclude(variable_name) + explicitly_included = (include_patterns !== nothing && + any(occursin(p, variable_name) for p ∈ include_patterns)) + if exclude_patterns === nothing && include_patterns !== nothing + excluded = !explicitly_included + elseif exclude_patterns !== nothing + if !explicitly_included && + any(occursin(p, variable_name) for p ∈ exclude_patterns) + excluded = true else excluded = false end - return excluded, explicitly_included + else + excluded = false end + return excluded, explicitly_included + end - # Plot the total time - time_unit_conversion = 1.0e-9 # ns to s - total_time_variable_name = "time:moment_kinetics;time_advance! step" - total_time = get_variable(run_info, total_time_variable_name * "_per_step", - group=this_rank_group) + # Plot the total time + time_unit_conversion = 1.0e-9 # ns to s + total_time_variable_name = "time:moment_kinetics;time_advance! step" + total_time = get_variable(run_info, total_time_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(times_ax, run_info.time, total_time .* time_unit_conversion; color=:black, - linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(times_ax, total_time, :black, time_unit_conversion) - mean_total_time = mean(total_time) - for (variable_counter, variable_name) ∈ enumerate(time_variables) - if variable_name == total_time_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_time - # This variable takes a very small amount of time, so skip. - continue - end + irank_slice = total_time[irank+1,:] + lines!(times_ax, run_info.time, irank_slice .* time_unit_conversion; + color=:black, linestyle=linestyles[irun], label=label, + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(times_ax, irank_slice, irank, :black, time_unit_conversion) + end + mean_total_time = mean(total_time) + for (variable_counter, variable_name) ∈ enumerate(time_variables) + if variable_name == total_time_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_time + # This variable takes a very small amount of time, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(times_ax, run_info.time, variable .* time_unit_conversion; + irank_slice = variable[irank+1,:] + l = lines!(times_ax, run_info.time, irank_slice .* time_unit_conversion; color=Cycled(variable_counter), linestyle=linestyles[irun], - label=label, inspector_label=inspector_label_func) - label_irank(times_ax, variable, l.color, time_unit_conversion) + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(times_ax, irank_slice, irank, l.color, time_unit_conversion) end + end - # Plot the number of calls - total_ncalls_variable_name = "ncalls:moment_kinetics;time_advance! step" - total_ncalls = get_variable(run_info, total_ncalls_variable_name * "_per_step", - group=this_rank_group) + # Plot the number of calls + total_ncalls_variable_name = "ncalls:moment_kinetics;time_advance! step" + total_ncalls = get_variable(run_info, total_ncalls_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(ncalls_ax, run_info.time, total_ncalls; color=:black, + irank_slice = total_ncalls[irank+1,:] + lines!(ncalls_ax, run_info.time, irank_slice; color=:black, linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(ncalls_ax, total_ncalls, :black) - mean_total_ncalls = mean(total_ncalls) - for (variable_counter, variable_name) ∈ enumerate(ncalls_variables) - if variable_name == total_ncalls_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_ncalls - # This variable takes a very small number of calls, so skip. - continue - end + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(ncalls_ax, irank_slice, irank, :black) + end + mean_total_ncalls = mean(total_ncalls) + for (variable_counter, variable_name) ∈ enumerate(ncalls_variables) + if variable_name == total_ncalls_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_ncalls + # This variable takes a very small number of calls, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(ncalls_ax, run_info.time, variable; color=Cycled(variable_counter), - linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(ncalls_ax, variable, l.color) + irank_slice = variable[irank+1,:] + l = lines!(ncalls_ax, run_info.time, irank_slice; + color=Cycled(variable_counter), linestyle=linestyles[irun], + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(ncalls_ax, irank_slice, irank, l.color) end + end - # Plot the total allocs - allocs_unit_conversion = 2^(-20) # bytes to MB - total_allocs_variable_name = "allocs:moment_kinetics;time_advance! step" - total_allocs = get_variable(run_info, total_allocs_variable_name * "_per_step", - group=this_rank_group) + # Plot the total allocs + allocs_unit_conversion = 2^(-20) # bytes to MB + total_allocs_variable_name = "allocs:moment_kinetics;time_advance! step" + total_allocs = get_variable(run_info, total_allocs_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(allocs_ax, run_info.time, total_allocs .* allocs_unit_conversion; + irank_slice = total_allocs[irank+1,:] + lines!(allocs_ax, run_info.time, irank_slice .* allocs_unit_conversion; color=:black, linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(allocs_ax, total_allocs, :black, allocs_unit_conversion) - mean_total_allocs = mean(total_allocs) - for (variable_counter, variable_name) ∈ enumerate(allocs_variables) - if variable_name == total_allocs_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_allocs - # This variable represents a very small amount of allocs, so skip. - continue - end + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(allocs_ax, irank_slice, irank, :black, allocs_unit_conversion) + end + mean_total_allocs = mean(total_allocs) + for (variable_counter, variable_name) ∈ enumerate(allocs_variables) + if variable_name == total_allocs_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_allocs + # This variable represents a very small amount of allocs, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(allocs_ax, run_info.time, variable .* allocs_unit_conversion; + irank_slice = variable[irank+1,:] + l = lines!(allocs_ax, run_info.time, irank_slice .* allocs_unit_conversion; color=Cycled(variable_counter), linestyle=linestyles[irun], - label=label, inspector_label=inspector_label_func) - label_irank(allocs_ax, variable, l.color, - allocs_unit_conversion) + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(allocs_ax, irank_slice, irank, l.color, allocs_unit_conversion) end end - if times_fig !== nothing && plot_prefix === nothing && + if interactive_figs !== nothing && times_fig !== nothing && plot_prefix === nothing && string(Makie.current_backend()) == "GLMakie" # Can make interactive plots diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 89e242bbc..5630aaf6d 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -15,7 +15,7 @@ using ..debugging using ..input_structs using ..looping using ..timer_utils -using ..timer_utils: timer_names_per_rank_moments, timer_names_per_rank_dfns, +using ..timer_utils: timer_names_all_ranks_moments, timer_names_all_ranks_dfns, TimerNamesDict, SortedDict using ..moment_kinetics_structs: scratch_pdf, em_fields_struct using ..type_definitions: mk_float, mk_int @@ -434,29 +434,6 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe external_source_settings, input_dict, restart_time_index, previous_runs_info, time_for_setup, t_params, nl_solver_params) - # A Dict to store the names of timers that have been created on each MPI rank - if block_rank[] == 0 - if io_input.parallel_io - # Need timer name for every rank, as variable creation and extension are - # collective operations. - for irank ∈ 0:global_size[]-1 - timer_names_per_rank_moments[irank] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[irank] = (TimerNamesDict(), Ref(0)) - end - else - # Timers only written from their own block, so only names from this block are - # needed. - for irank_in_block ∈ 0:block_size[]-1 - this_global_rank = global_rank[] + irank_in_block - timer_names_per_rank_moments[this_global_rank] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[this_global_rank] = (TimerNamesDict(), Ref(0)) - end - end - else - timer_names_per_rank_moments[global_rank[]] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[global_rank[]] = (TimerNamesDict(), Ref(0)) - end - begin_serial_region() @serial_region begin # Only read/write from first process in each 'block' @@ -1116,9 +1093,6 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables") timing = create_io_group(fid, "timing_data", description="timing data to check run-time performance") - for i ∈ 0:global_size[]-1 - create_io_group(timing, "rank$i", description="timing data for MPI rank $i") - end io_time = create_dynamic_variable!(dynamic, "time", mk_float; parallel_io=parallel_io, description="simulation time") @@ -2541,52 +2515,52 @@ function write_timing_data(io_moments, t_idx, dfns=false) # hard-coded list of every timer (which would make it inconvenient to add new timers # or debug timers). # - # As well, when using parallel I/O all processes (in the communicator that opens the - # output file) must create and extend variables, although it's OK for only one process - # to write a variable if the variable is a scalar. - # - # These two facts mean that every process in `comm_inter_block[]` needs to know the - # complete list of timers on every rank, so we have to gather this list. The list is - # stored in `timer_names_per_rank_moments` and `timer_names_per_rank_dfns`. We store - # two separate lists because moments and dfns might not be written at the same times, - # so some timers may be newly added in one but not the other, and so need to be - # tracked separately. + # It is most convenient for every process in `comm_world` to know the complete list + # of timers that exist on any rank, so we have to gather this list. The list is + # stored in `timer_names_all_ranks_moments` and `timer_names_all_ranks_dfns`. We + # store two separate lists because moments and dfns might not be written at the same + # times, so some timers may be newly added in one but not the other, and so need to + # be tracked separately. # - # In order to make sure that every process in `comm_inter_block[]` deals with the same - # variable at the same time, we store the variable names for each rank in a nested - # SortedDict (with the same nesting structure as the `global_timer` on each rank). The - # sort applied to the keys of a SortedDict means that we can be sure to iterate - # through the names in the same order on every process. The names for every rank are - # collected onto each process in `comm_inter_block[]`, but on the rest of the - # processes, only the names from that individual process are collected (as 'the rest' - # do not write data, so do not need information from other processes). + # In order to make sure that every process deals with the same variable at the same + # time, we store the variable names for each rank in a nested SortedDict (with the + # same nesting structure as the `global_timer` on each rank). The sort applied to + # the keys of a SortedDict means that we can be sure to iterate through the names in + # the same order on every process. # - # Only the process in the same shared-memory block as `irank` collects the timing data - # from `irank` and writes it to file. + # The timing data for every process in a shared-memory block is collected to the + # root process of the block to be written to file. # # In order to avoid communicating many strings at every output step, we first check - # how many timers there are on each rank, and compare that to the number in the - # collected list. The number of new variables is gathered, and if there are no new - # variables, no communication of variable names needs to be done. If communication is - # needed, only the names of the new variables need to be gathered. + # how new timers there are on each rank, that are not in the global list of timers. + # The number of new variables is gathered, and if there are no new variables, no + # communication of variable names needs to be done. If communication is needed, only + # the names of the new variables need to be gathered. # # Once the lists of variable names have been updated, the timing data is gathered onto # the root process of each shared-memory block. The data is communicated as vectors of # integers, with the order of the entries being determined by the order of the nested # SortedDict. # - # Each entry in `timer_names_per_rank_moments` and `timer_names_per_rank_dfns` is a - # SortedDict, even if it does not yet contain any other entries, because sub-timers - # might be added at any point. + # Each entry in `timer_names_all_ranks_moments` and `timer_names_all_ranks_dfns` is + # a SortedDict, even if it does not yet contain any other entries, because + # sub-timers might be added at any point. if dfns - timer_names_per_rank = timer_names_per_rank_dfns + timer_names_all_ranks = timer_names_all_ranks_dfns else - timer_names_per_rank = timer_names_per_rank_moments + timer_names_all_ranks = timer_names_all_ranks_moments + end + + # Collect the names that have not been used on any rank before this call. + unique_new_names = String[] + + if block_rank[] == 0 + io_group = get_group(io_moments.fid, "timing_data") end # Find any new timer names on this process. - function get_new_timer_names(timer_names_dict) + function get_new_timer_names() new_timer_names = String[] function get_names_inner(this_timer, timer_names_subdict, prefix) names_subdict_keys = keys(timer_names_subdict) @@ -2606,49 +2580,29 @@ function write_timing_data(io_moments, t_idx, dfns=false) end return nothing end - get_names_inner(global_timer, timer_names_dict, "") + get_names_inner(global_timer, timer_names_all_ranks, "") return new_timer_names end - # Calculate the total number of entries in a nested Dict. This is used to store the - # total size of each entry of `timer_names_per_rank_moments` and - # `timer_names_per_rank_dfns` so that we know the size of the arrays of data that need - # to be communicated. - function get_size_of_timer_dict(this_dict) - # One count for the timer represented by this_dict. - counter = 1 - - # Recursively iterate through all contained Dicts, and add them to the counter. - for sub_dict ∈ values(this_dict) - counter += get_size_of_timer_dict(sub_dict) - end - return counter - end - - # Add the new timer names in to `timer_names_per_rank_moments` or - # `timer_names_per_rank_dfns`. - function add_new_timer_names!(timer_names, new_timer_names, irank=nothing) + # Add the new timer names in to `timer_names_all_ranks_moments` or + # `timer_names_all_ranks_dfns`. Note that there may be duplicate names in + # new_timer_names, and these will be ignored. + function add_new_timer_names!(new_timer_names) for n ∈ new_timer_names - if irank === nothing - this_rank, this_name = split(n, ":") - this_rank = parse(mk_int, this_rank) - this_dict, _ = timer_names_per_rank[this_rank] - else - this_name = n - this_dict, _ = timer_names_per_rank[irank] - end - split_name = split(this_name, ";") - for level ∈ split_name - if level ∉ keys(this_dict) - this_dict[level] = TimerNamesDict() + this_dict_all_ranks = timer_names_all_ranks + split_name = split(n, ";") + n_levels = length(split_name) + for (i, level) ∈ enumerate(split_name) + if level ∉ keys(this_dict_all_ranks) + this_dict_all_ranks[level] = TimerNamesDict() + if i == n_levels + # New variable that has not been used on any rank before. + push!(unique_new_names, n) + end end - this_dict = this_dict[level] + this_dict_all_ranks = this_dict_all_ranks[level] end end - for (timer_names_dict, timer_names_dict_size) ∈ values(timer_names_per_rank) - # Subtract 1 because we do not want to count the 'top level' as an entry. - timer_names_dict_size[] = get_size_of_timer_dict(timer_names_dict) - 1 - end return nothing end @@ -2656,13 +2610,14 @@ function write_timing_data(io_moments, t_idx, dfns=false) # simultaneously by all processes in `comm_inter_block[]`. function create_new_timer_io_variables!(new_timer_names, timer_group, parallel_io) for n ∈ new_timer_names - this_rank, this_name = split(n, ":") - rank_group = get_group(timer_group, "rank$this_rank") - create_dynamic_variable!(rank_group, "time:" * this_name, mk_int; + create_dynamic_variable!(io_group, "time:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) - create_dynamic_variable!(rank_group, "ncalls:" * this_name, mk_int; + create_dynamic_variable!(io_group, "ncalls:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) - create_dynamic_variable!(rank_group, "allocs:" * this_name, mk_int; + create_dynamic_variable!(io_group, "allocs:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) end return nothing @@ -2671,296 +2626,143 @@ function write_timing_data(io_moments, t_idx, dfns=false) # Once all the names are known, use this function to collect all the data from timers # on this process into arrays to be communicated. function get_data_from_timers() - sorted_timer_names = timer_names_per_rank[global_rank[]][1] times = mk_int[] ncalls = mk_int[] allocs = mk_int[] function walk_through_timers(names_dict, timer) - push!(times, timer.accumulated_data.time) - push!(ncalls, timer.accumulated_data.ncalls) - push!(allocs, timer.accumulated_data.allocs) + if timer === nothing + # Timer not found on this rank, so set to 0 + push!(times, 0.0) + push!(ncalls, 0.0) + push!(allocs, 0.0) + else + push!(times, timer.accumulated_data.time) + push!(ncalls, timer.accumulated_data.ncalls) + push!(allocs, timer.accumulated_data.allocs) + end # Note that here we have to get the order of entries from names_dict (which is # a SortedDict) to ensure that the order of each list is consistent between # all different processes. for (sub_name, sub_dict) ∈ pairs(names_dict) - walk_through_timers(sub_dict, timer[sub_name]) + if timer === nothing || sub_name ∉ keys(timer.inner_timers) + walk_through_timers(sub_dict, nothing) + else + walk_through_timers(sub_dict, timer[sub_name]) + end end end - for name ∈ keys(sorted_timer_names) - walk_through_timers(sorted_timer_names[name], global_timer[name]) + for name ∈ keys(timer_names_all_ranks) + walk_through_timers(timer_names_all_ranks[name], global_timer[name]) end return times, ncalls, allocs end - if block_rank[] == 0 - parallel_io = io_moments.io_input.parallel_io - end - - # First count how many new timer names there are on all the processes in this block - if block_rank[] == 0 - n_new_names = Vector{mk_int}(undef, block_size[]) - new_names_iblock = get_new_timer_names(timer_names_per_rank[global_rank[]][1]) - n_new_names[1] = length(new_names_iblock) - reqs = [MPI.Irecv!(@view(n_new_names[irank+1:irank+1]), comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - MPI.Waitall(reqs) - block_total_new_names = sum(n_new_names) + parallel_io = io_moments.io_input.parallel_io + if parallel_io + comm = comm_world + comm_size = global_size[] + comm_rank = global_rank[] else - new_names_iblock = get_new_timer_names(timer_names_per_rank[global_rank[]][1]) - add_new_timer_names!(timer_names_per_rank, new_names_iblock, global_rank[][1]) - n_new_names_iblock = Ref(length(new_names_iblock)) - MPI.Send(n_new_names_iblock, comm_block[]; dest=0) + comm = comm_block[] + comm_size = block_size[] + comm_rank = block_rank[] end - # Gather new names, from any processes in the block that have new names. - if block_rank[] == 0 - if block_total_new_names > 0 - new_names = SortedDict{mk_int,Vector{String}}() - if n_new_names[1] > 0 - new_names[0] = new_names_iblock - else - new_names[0] = String[] - end - for iblock ∈ 1:block_size[]-1 - if n_new_names[iblock+1] > 0 - names_length = Ref(0) - MPI.Recv!(names_length, comm_block[]; source=iblock) - names_char_vector = Vector{Char}(undef, names_length[]) - MPI.Recv!(names_char_vector, comm_block[]; source=iblock) - new_names[iblock] = split(string(names_char_vector...), "&") - else - new_names[iblock] = String[] - end - end - end - else - if n_new_names_iblock[] > 0 - names_string = join(new_names_iblock, "&") - names_char_vector = [names_string...] - names_length = Ref(length(names_char_vector)) - MPI.Send(names_length, comm_block[]; dest=0) - MPI.Send(names_char_vector, comm_block[]; dest=0) - end - end - - # Allgather new names onto all ranks in comm_inter_block[]. - # If parallel_io=false, no need to communicate between different blocks, as each block - # writes output independently. - if block_rank[] == 0 && parallel_io - # Get total number of new names - global_total_new_names = MPI.Allreduce(block_total_new_names, +, - comm_inter_block[]) - - if global_total_new_names > 0 - # Pack all new names for a block into a single string for communication. - this_block_string = "" - for irank ∈ 0:block_size[]-1 - this_global_rank = global_rank[] + irank - this_rank_string = string(("$this_global_rank:" * s * "&" for s ∈ new_names[irank])...) - this_block_string = join([this_block_string, this_rank_string]) - end - - # Get the sizes of the per-block strings that need to be gathered - string_sizes = Vector{mk_int}(undef, n_blocks[]) - string_sizes[iblock_index[]+1] = length(this_block_string) - string_buffer = MPI.UBuffer(string_sizes, 1) - MPI.Allgather!(string_buffer, comm_inter_block[]) + # First count how many new timer names all processes + new_names_this_rank = get_new_timer_names() + n_new_names = Ref(length(new_names_this_rank)) + MPI.Allreduce!(n_new_names, +, comm) - # Gather the strings - gathered_char_vector = Vector{Char}(undef, sum(string_sizes)) - local_start_index = sum(string_sizes[1:iblock_index[]]) + 1 - local_end_index = local_start_index - 1 + string_sizes[iblock_index[]+1] - gathered_char_vector[local_start_index:local_end_index] .= [this_block_string...] - gathered_buffer = MPI.VBuffer(gathered_char_vector, string_sizes) - MPI.Allgatherv!(gathered_buffer, comm_inter_block[]) + # Allgather new names onto all ranks - if parallel_io=true this is all ranks in + # comm_world, if parallel_io=false it is all ranks in comm_block[] (as each block + # writes output independently). + if n_new_names[] > 0 + # Pack all new names into a single string for communication. + new_names_string = string((s * "&" for s ∈ new_names_this_rank)...) - # The string will end with a "&", so we need to slice off the final element, which - # will be an empty string. - all_new_names = split(string(gathered_char_vector...), "&")[1:end-1] + # Get the sizes of the per-rank strings that need to be gathered + string_sizes = Vector{mk_int}(undef, comm_size) + string_sizes[comm_rank+1] = length(new_names_string) + string_buffer = MPI.UBuffer(string_sizes, 1) + MPI.Allgather!(string_buffer, comm) - # Add the new names to timer_names_per_rank - add_new_timer_names!(timer_names_per_rank, all_new_names) + # Gather the strings + gathered_char_vector = Vector{Char}(undef, sum(string_sizes)) + local_start_index = sum(string_sizes[1:comm_rank]) + 1 + local_end_index = local_start_index - 1 + string_sizes[comm_rank+1] + gathered_char_vector[local_start_index:local_end_index] .= [new_names_string...] + gathered_buffer = MPI.VBuffer(gathered_char_vector, string_sizes) + MPI.Allgatherv!(gathered_buffer, comm) - create_new_timer_io_variables!(all_new_names, - get_group(io_moments.fid, "timing_data"), - parallel_io) - end - elseif block_rank[] == 0 - # This is the `parallel_io=false` case - if block_total_new_names > 0 - # Put irank prefixes onto timer names - for irank ∈ 0:block_size[] - 1 - this_global_rank = global_rank[] + irank - names_without_prefix = new_names[irank] - new_names[irank] = ["$this_global_rank:" * s for s ∈ names_without_prefix] - end + # The string will end with a "&", so we need to slice off the final element, which + # will be an empty string. + all_new_names = split(string(gathered_char_vector...), "&")[1:end-1] - all_new_names = vcat((new_names[irank] for irank ∈ 0:block_size[]-1)...) + # Add the new names to timer_names_per_rank + add_new_timer_names!(all_new_names) - # Add the new names to timer_names_per_rank - add_new_timer_names!(timer_names_per_rank, all_new_names) - - create_new_timer_io_variables!(all_new_names, - get_group(io_moments.fid, "timing_data"), - parallel_io) + if block_rank[] == 0 + create_new_timer_io_variables!(unique_new_names, io_group, parallel_io) end end # Collect the timing data onto the root process of each block - if block_rank[] == 0 - times_data = SortedDict{mk_int,Vector{mk_int}}() - ncalls_data = SortedDict{mk_int,Vector{mk_int}}() - allocs_data = SortedDict{mk_int,Vector{mk_int}}() - times_data[0], ncalls_data[0], allocs_data[0] = get_data_from_timers() - for irank ∈ 1:block_size[]-1 - this_global_rank = global_rank[] + irank - times_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - ncalls_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - allocs_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - end - times_reqs = MPI.Request[MPI.Irecv!(times_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - ncalls_reqs = MPI.Request[MPI.Irecv!(ncalls_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - allocs_reqs = MPI.Request[MPI.Irecv!(allocs_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - MPI.Waitall(MPI.Request[times_reqs..., ncalls_reqs..., allocs_reqs...]) - else - times_data, ncalls_data, allocs_data = get_data_from_timers() - times_req = MPI.Isend(times_data, comm_block[]; dest=0) - ncalls_req = MPI.Isend(ncalls_data, comm_block[]; dest=0) - allocs_req = MPI.Isend(allocs_data, comm_block[]; dest=0) - MPI.Waitall([times_req, ncalls_req, allocs_req]) - end + times_data, ncalls_data, allocs_data = get_data_from_timers() + n_timers = length(times_data) + gathered_times_data = MPI.Gather(times_data, comm_block[]; root=0) + gathered_ncalls_data = MPI.Gather(ncalls_data, comm_block[]; root=0) + gathered_allocs_data = MPI.Gather(allocs_data, comm_block[]; root=0) if block_rank[] == 0 + gathered_times_data = reshape(gathered_times_data, n_timers, block_size[]) + gathered_ncalls_data = reshape(gathered_ncalls_data, n_timers, block_size[]) + gathered_allocs_data = reshape(gathered_allocs_data, n_timers, block_size[]) + # Write the timer variables - for irank ∈ keys(timer_names_per_rank) - this_rank_block = irank ÷ block_size[] - this_rank_rank = irank % block_size[] - this_rank_names, this_rank_nvars = timer_names_per_rank[irank] - - # Only the process in the same shared-memory block as the data came from - # actually has the data, so this process is the one that writes it. - write_from_this_rank = (this_rank_block == iblock_index[]) - - # We iterate through the variables in the same order as they were packed into - # the array in `get_data_from_timers()`, so `counter` gets the corresponding - # data from the flattened array that was communicated. - counter = 1 - io_group = get_group(io_moments.fid, "timing_data/rank$irank") - - # For timers on the root process of each shared memory block, we do not need - # to communicate so we can write the data directly from the timers. This - # should help ensure that at least for these processes the timer data has the - # right name in the output file since no packing/unpacking of flattened arrays - # is needed. This verification (since times can then be compared to other - # processes with the expectation that most of them are similar) justifies the - # partial code duplication in having this function be separate from - # `write_for_irank()`. - function write_for_block_root(names_dict, this_name, this_timer, - this_top_level_name=this_name) - io_time = io_group["time:" * this_name] - io_ncalls = io_group["ncalls:" * this_name] - io_allocs = io_group["allocs:" * this_name] - if t_idx < 0 - # The top-level timer (usually "moment_kinetics" was probably the - # first one created. It definitely exists, and should have been - # written at each timestep. - # If we got the length of `time:$this_name`, the variable might have - # the wrong length (e.g. if it has only just been created and has - # length 1). - this_t_idx = length(io_group["time:" * this_top_level_name]) - else - this_t_idx = t_idx - end - if write_from_this_rank - append_to_dynamic_var(io_time, this_timer.accumulated_data.time, - this_t_idx, parallel_io; - write_from_this_rank=true) - append_to_dynamic_var(io_ncalls, this_timer.accumulated_data.ncalls, - this_t_idx, parallel_io; - write_from_this_rank=true) - append_to_dynamic_var(io_allocs, this_timer.accumulated_data.allocs, - this_t_idx, parallel_io; - write_from_this_rank=true) - else - append_to_dynamic_var(io_time, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - append_to_dynamic_var(io_ncalls, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - append_to_dynamic_var(io_allocs, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - end - counter += 1 - for (sub_name, sub_dict) ∈ pairs(names_dict) - write_for_block_root(sub_dict, this_name * ";" * sub_name, - this_timer === nothing ? nothing : - this_timer[sub_name], this_top_level_name) - end - end - # Write data for non-root processes in the shared memory block, which must be - # unpacked from the arrays that were communicated. - function write_for_irank(names_dict, this_name, this_top_level_name=this_name) - io_time = io_group["time:" * this_name] - io_ncalls = io_group["ncalls:" * this_name] - io_allocs = io_group["allocs:" * this_name] - if t_idx < 0 - # The top-level timer (usually "moment_kinetics" was probably the - # first one created. It definitely exists, and should have been - # written at each timestep. - # If we got the length of `time:$this_name`, the variable might have - # the wrong length (e.g. if it has only just been created and has - # length 1). - this_t_idx = length(io_group["time:" * this_top_level_name]) - else - this_t_idx = t_idx - end - if write_from_this_rank - irank_in_block = irank % block_size[] - append_to_dynamic_var(io_time, times_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - append_to_dynamic_var(io_ncalls, ncalls_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - append_to_dynamic_var(io_allocs, allocs_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - else - append_to_dynamic_var(io_time, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - append_to_dynamic_var(io_ncalls, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - append_to_dynamic_var(io_allocs, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - end - counter += 1 - for (sub_name, sub_dict) ∈ pairs(names_dict) - write_for_irank(sub_dict, this_name * ";" * sub_name, this_top_level_name) - end - end + # We iterate through the variables in the same order as they were packed into + # the array in `get_data_from_timers()`, so `counter` gets the corresponding + # data from the flattened array that was communicated. + counter = 1 - if this_rank_rank == 0 - # This was the data from the root process of a shared-memory block. - for top_level_name ∈ keys(this_rank_names) - write_for_block_root(this_rank_names[top_level_name], top_level_name, - write_from_this_rank ? global_timer[top_level_name] : nothing) - end - else - # This was data from a process that is not the root of a shared-memory - # block, so had to be packed, communicated, and unpacked. - for top_level_name ∈ keys(this_rank_names) - write_for_irank(this_rank_names[top_level_name], top_level_name) - end - end - if counter != this_rank_nvars[] + 1 - error("Got wrong number of timers. Wrote $(counter-1) but should have " - * "been $this_rank_nvars") + # Write data for all processes in the shared memory block, which must be + # unpacked from the arrays that were communicated. + if t_idx < 0 + # The top-level timer (usually "moment_kinetics" was probably the + # first one created. It definitely exists, and should have been + # written at each timestep. + # If we got the length of `time:$this_name`, the variable might have + # the wrong length (e.g. if it has only just been created and has + # length 1). + t_idx = length(io_group["time:" * first(keys(timer_names_all_ranks))]) + end + if parallel_io + timer_coord = (local_io_range=1:block_size[], + global_io_range=global_rank[]+1:global_rank[]+block_size[]) + else + timer_coord = (local_io_range=1:block_size[], + global_io_range=1:block_size[]) + end + function write_level(names_dict, this_name) + io_time = io_group["time:" * this_name] + io_ncalls = io_group["ncalls:" * this_name] + io_allocs = io_group["allocs:" * this_name] + @views append_to_dynamic_var(io_time, gathered_times_data[counter,:], t_idx, + parallel_io, timer_coord) + @views append_to_dynamic_var(io_ncalls, gathered_ncalls_data[counter,:], + t_idx, parallel_io, timer_coord) + @views append_to_dynamic_var(io_allocs, gathered_allocs_data[counter,:], + t_idx, parallel_io, timer_coord) + counter += 1 + for (sub_name, sub_dict) ∈ pairs(names_dict) + write_level(sub_dict, this_name * ";" * sub_name) end end + + for top_level_name ∈ keys(timer_names_all_ranks) + write_level(timer_names_all_ranks[top_level_name], top_level_name) + end end # Pick a fixed size for "global_timer_string" so that we can overwrite the variable @@ -2972,7 +2774,7 @@ function write_timing_data(io_moments, t_idx, dfns=false) if t_idx == -1 top_level = nothing else - if "moment_kinetics" ∈ keys(timer_names_per_rank[global_rank[]]) + if "moment_kinetics" ∈ keys(timer_names_all_ranks) top_level = ("moment_kinetics", "time_advance! step", "ssp_rk!") this_dict = timer_names_all_ranks @@ -3039,8 +2841,8 @@ file. Needs to be called after exiting from the `@timeit` block so that all time finalised properly. """ function write_final_timing_data_to_binary(io_or_file_info_moments, io_or_file_info_dfns) - io_moments = nothing - io_dfns_moments = nothing + io_moments = io_or_file_info_moments + io_dfns_moments = io_or_file_info_dfns @serial_region begin # Only read/write from first process in each 'block' @@ -3616,8 +3418,8 @@ end close all opened output files """ function finish_file_io(ascii_io::Union{ascii_ios,Nothing}, - binary_moments::Union{io_moments_info,Tuple,Nothing}, - binary_dfns::Union{io_dfns_info,Tuple,Nothing}) + binary_moments::Union{io_moments_info,Tuple,NamedTuple,Nothing}, + binary_dfns::Union{io_dfns_info,Tuple,NamedTuple,Nothing}) @serial_region begin # Only read/write from first process in each 'block' diff --git a/moment_kinetics/src/file_io_hdf5.jl b/moment_kinetics/src/file_io_hdf5.jl index 413c8f5a8..d647ab554 100644 --- a/moment_kinetics/src/file_io_hdf5.jl +++ b/moment_kinetics/src/file_io_hdf5.jl @@ -261,14 +261,14 @@ end function append_to_dynamic_var(io_var::HDF5.Dataset, data::Union{Nothing,Number,AbstractArray{T,N}}, t_idx, parallel_io::Bool, - coords::Union{coordinate,Integer}...; + coords::Union{coordinate,NamedTuple,Integer}...; only_root=false, write_from_this_rank=nothing) where {T,N} # Extend time dimension for this variable dims = size(io_var) dims_mod = (dims[1:end-1]..., t_idx) HDF5.set_extent_dims(io_var, dims_mod) - local_ranges = Tuple(isa(c, coordinate) ? c.local_io_range : 1:c for c ∈ coords) - global_ranges = Tuple(isa(c, coordinate) ? c.global_io_range : 1:c for c ∈ coords) + local_ranges = Tuple(isa(c, Integer) ? (1:c) : c.local_io_range for c ∈ coords) + global_ranges = Tuple(isa(c, Integer) ? (1:c) : c.global_io_range for c ∈ coords) if only_root && parallel_io && global_rank[] != 0 # Variable should only be written from root, and this process is not root for the diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index e6b9576ce..35e427319 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3596,16 +3596,13 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin evolving_variables = Tuple(evolving_variables) end - timing_variable_names = OrderedDict{mk_int, Vector{String}}() - for fid ∈ fids0 - timing_group = get_group(fid, "timing_data") - timing_rank_names = collect(k for k in keys(timing_group) if startswith(k, "rank")) - for group_name ∈ timing_rank_names - rank_group = get_group(timing_group, group_name) - irank = parse(mk_int, split(group_name, "rank")[2]) - timing_variable_names[irank] = collect(keys(rank_group)) - end - end + # Assume the timing variables are the same in every restart - this may not always be + # true, and might cause errors if some variables are missing for restarts after the + # first. + timing_group = get_group(fids0[1], "timing_data") + timing_variable_names = collect(k for k in keys(timing_group) + if startswith(k, "time:") || startswith(k, "ncalls:") || + startswith(k, "allocs:")) if parallel_io files = fids0 diff --git a/moment_kinetics/src/timer_utils.jl b/moment_kinetics/src/timer_utils.jl index eba756f2e..3ff8f02ff 100644 --- a/moment_kinetics/src/timer_utils.jl +++ b/moment_kinetics/src/timer_utils.jl @@ -37,16 +37,16 @@ const TimerNamesDict = SortedDict{String,SortedDict,Base.Order.ForwardOrdering} TimerNamesDict() = TimerNamesDict(Base.Order.ForwardOrdering()) """ -Nested SortedDict containting the names of all timers that have been created on each MPI -rank and added to the moments output file. +Nested SortedDict with the names of all timers that have been created on any MPI rank +and added to the moments output file. """ -const timer_names_per_rank_moments = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() +const timer_names_all_ranks_moments = TimerNamesDict() """ -Nested SortedDict containting the names of all timers that have been created on each MPI -rank and added to the dfns output file. +Nested SortedDict with the names of all timers that have been created on any MPI rank +and added to the dfns output file. """ -const timer_names_per_rank_dfns = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() +const timer_names_all_ranks_dfns = TimerNamesDict() """ format_global_timer(; show=true, truncate_output=true) @@ -176,8 +176,8 @@ Reset all global state of timers. """ function reset_mk_timers!() reset_timer!(global_timer) - empty!(timer_names_per_rank_moments) - empty!(timer_names_per_rank_dfns) + empty!(timer_names_all_ranks_moments) + empty!(timer_names_all_ranks_dfns) end end #timer_utils From d9a85008077d4b89bd4d49f82ffb2cde7944a911 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 12 Nov 2024 23:54:30 +0000 Subject: [PATCH 11/15] When using GLMakie, always make all interactive figures Something has made DataInspector work in all the figures when they are opened simultaneously - I guess either the change to making `inspector_label` be defined separately for each plot in the previous commit, or some update to `Makie`. --- .../src/makie_post_processing.jl | 115 ++++++++---------- 1 file changed, 48 insertions(+), 67 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 77dd8650c..0652d06c6 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -8429,7 +8429,7 @@ end """ timing_data(run_info; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, - figsize=nothing, interactive_figs=nothing, include_legend=true) + figsize=nothing, include_legend=true) Plot timings from different parts of the `moment_kinetics` code. Only timings from function calls during the time evolution loop are included, not from the setup, because we @@ -8463,11 +8463,11 @@ set in `this_input_dict`. When this function is called as part of the settings are read from the post processing input file (by default `post_processing_input.toml`). The function arguments take precedence, if they are given. -If you load GLMakie (by doing `using GLMakie`) before running this function, -`interactive_figs` can be passed one of, or a Vector of, `:times`, `:ncalls`, or `:allocs` -to display an interactive window showing that figure. Multiple values can be passed, but -the DataInspector (which displays information when you hover over the figure) will only -work for the first one in the list (this may be a GLMakie bug?). +If you load GLMakie by doing `using GLMakie` before running this function, but after +calling `using makie_post_processing` (because `CairoMakie` is loaded when the module is +loaded and would take over if you load `GLMakie` before `makie_post_processing`), the +figures will be displayed in interactive windows. When you hover over a line some useful +information will be displayed. Pass `include_legend=false` to remove legends from the figures. This is mostly useful for interactive figures where hovering over the lines can show what they are, so that the @@ -8475,8 +8475,7 @@ legend is not needed. """ function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, - this_input_dict=nothing, figsize=nothing, interactive_figs=nothing, - include_legend=true) + this_input_dict=nothing, figsize=nothing, include_legend=true) if this_input_dict !== nothing input = Dict_to_NamedTuple(this_input_dict["timing_data"]) @@ -8515,40 +8514,31 @@ function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, ncalls_ax=ncalls_ax, allocs_ax=allocs_ax, irun=irun, figsize=figsize) end - if interactive_figs !== nothing && string(Makie.current_backend()) == "GLMakie" + if string(Makie.current_backend()) == "GLMakie" # Can make interactive plots - if isa(interactive_figs, Symbol) - interactive_figs = [interactive_figs] - end backend = Makie.current_backend() - for figtype ∈ interactive_figs - if figtype == :times - if include_legend - Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(times_fig) - display(backend.Screen(), times_fig) - elseif figtype == :ncalls - if include_legend - Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(ncalls_fig) - display(backend.Screen(), ncalls_fig) - elseif figtype == :allocs - if include_legend - Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(allocs_fig) - display(backend.Screen(), allocs_fig) - else - error("Got unrecognized entry $figtype in `interactive_figs`") - end + if include_legend + Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(times_fig) + display(backend.Screen(), times_fig) + + if include_legend + Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + + if include_legend + Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) end + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) elseif plot_prefix !== nothing if include_legend Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=true, merge=true) @@ -8587,7 +8577,7 @@ end function timing_data(run_info; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, this_input_dict=nothing, times_ax=nothing, ncalls_ax=nothing, - allocs_ax=nothing, irun=1, figsize=nothing, interactive_figs=nothing, + allocs_ax=nothing, irun=1, figsize=nothing, include_legend=true) if this_input_dict !== nothing @@ -8810,42 +8800,33 @@ function timing_data(run_info; plot_prefix=nothing, threshold=nothing, end end - if interactive_figs !== nothing && times_fig !== nothing && plot_prefix === nothing && + if times_fig !== nothing && plot_prefix === nothing && string(Makie.current_backend()) == "GLMakie" # Can make interactive plots - if isa(interactive_figs, Symbol) - interactive_figs = [interactive_figs] - end backend = Makie.current_backend() - for figtype ∈ interactive_figs - if figtype == :times - if include_legend - Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(times_fig) - display(backend.Screen(), times_fig) - elseif figtype == :ncalls - if include_legend - Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(ncalls_fig) - display(backend.Screen(), ncalls_fig) - elseif figtype == :allocs - if include_legend - Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(allocs_fig) - display(backend.Screen(), allocs_fig) - else - error("Got unrecognized entry $figtype in `interactive_figs`") - end + if include_legend + Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(times_fig) + display(backend.Screen(), times_fig) + + if include_legend + Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + + if include_legend + Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) end + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) else if times_fig !== nothing if include_legend From 9134374493210d7af5c89929fdad1aa8f7734eba Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 13 Nov 2024 09:38:28 +0000 Subject: [PATCH 12/15] Fix recursive_merge() to return same type as its first argument `mergewith()` always returns a `Dict`, ignoring the types of its arguments. We need `recursive_merge()` to return an `OrderedDict`, so re-implement `recursive_merge()` 'by hand' without using `mergewith()`. --- moment_kinetics/src/utils.jl | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/utils.jl b/moment_kinetics/src/utils.jl index f38954a29..74b8a3bbb 100644 --- a/moment_kinetics/src/utils.jl +++ b/moment_kinetics/src/utils.jl @@ -312,16 +312,20 @@ Merge two AbstractDicts `a` and `b`. Any elements that are AbstractDicts are als """ function recursive_merge end function recursive_merge(a::AbstractDict, b::AbstractDict) - return mergewith(recursive_merge, a, b) -end -function recursive_merge(a::AbstractDict, b) - error("Cannot merge a Dict with a non-Dict, got $a and $b") -end -function recursive_merge(a, b::AbstractDict) - error("Cannot merge a Dict with a non-Dict, got $a and $b") -end -function recursive_merge(a, b) - return b + result = deepcopy(a) + a_keys = collect(keys(a)) + for (k,v) ∈ pairs(b) + if k ∉ a_keys + result[k] = v + elseif isa(result[k], AbstractDict) && isa(v, AbstractDict) + result[k] = recursive_merge(result[k], v) + elseif isa(result[k], AbstractDict) || isa(v, AbstractDict) + error("Cannot merge a Dict with a non-Dict, got $(result[k]) and $v") + else + result[k] = v + end + end + return result end """ From d62bba13d521123a0eeb31c312dbdfc4865997ae Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 13 Nov 2024 13:11:37 +0000 Subject: [PATCH 13/15] Precompile for parallel tests Should help reduce memory usage and so avoid some errors. --- .github/workflows/parallel_test.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index 5441dcc82..129063d91 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -21,14 +21,15 @@ jobs: - uses: julia-actions/cache@v2 - run: | touch Project.toml - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["NCDatasets", "Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' + julia --project -O3 --check-bounds=no precompile.jl # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores - ./mpiexecjl -np 3 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 - ./mpiexecjl -np 4 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 - ./mpiexecjl -np 2 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 --long + ./mpiexecjl -np 3 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 4 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 2 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 --long # Note: MPI.jl's default implementation is mpich, which has a similar option # `--with-device=ch3:sock`, but that needs to be set when compiling mpich. shell: bash @@ -46,12 +47,13 @@ jobs: - uses: julia-actions/cache@v2 - run: | touch Project.toml - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["NCDatasets", "Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' + julia --project -O3 --check-bounds=no precompile.jl # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores - ./mpiexecjl -np 4 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 4 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 # Note: MPI.jl's default implementation is mpich, which has a similar option # `--with-device=ch3:sock`, but that needs to be set when compiling mpich. shell: bash From 1b3662a36745f6002c7875228f8e501dda08f1a3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 13 Nov 2024 20:53:49 +0000 Subject: [PATCH 14/15] Increase timeout for parallel tests job Updated kinetic electron test takes a bit longer, so just increase the maximum allowed time for the job. --- .github/workflows/parallel_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index 5441dcc82..c767cc6b1 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -10,7 +10,7 @@ permissions: jobs: test-ubuntu: runs-on: ubuntu-latest - timeout-minutes: 150 + timeout-minutes: 180 steps: - uses: actions/checkout@v4 From 22c22040d1b6a794201b5982ab5a2632303854aa Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 14 Nov 2024 09:25:26 +0000 Subject: [PATCH 15/15] Reduce length of "run_name" in restart_interpolation_tests.jl Avoids filename-too-long errors on Github Actions servers. --- moment_kinetics/test/restart_interpolation_tests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/test/restart_interpolation_tests.jl b/moment_kinetics/test/restart_interpolation_tests.jl index 5d03e02e9..066a917e3 100644 --- a/moment_kinetics/test/restart_interpolation_tests.jl +++ b/moment_kinetics/test/restart_interpolation_tests.jl @@ -108,7 +108,7 @@ function run_test(test_input, base, message, rtol, atol; tol_3V, args...) name = name[1:60] end if parallel_io - name *= "parallel-io" + name *= "p-io" end # Provide some progress info