diff --git a/docs/src/developing.md b/docs/src/developing.md index d89f889982..387bec7488 100644 --- a/docs/src/developing.md +++ b/docs/src/developing.md @@ -82,7 +82,20 @@ $ h5dump -d /timing_data/global_timer_string my_output_file.moments.h5 ``` More detailed timing information is saved for each MPI rank into subgroups -`rank` of the `timing_data` group in the output file. +`rank` of the `timing_data` group in the output file. This information can +be plotted using [`makie_post_processing.timing_data`](@ref). The plots contain +many curves. Filtering out the ones you are not interested in (using the +`include_patterns`, `exclude_patterns`, and/or `ranks` arguments) can help, but +it still may be useful to have interactive plots which show the label and MPI +rank when you hover over a curve. For example +```julia +julia> using makie_post_processing, GLMakie +julia> ri = get_run_info("runs/my_example_run/") +julia> timing_data(ri; interactive_figs=:times); +``` +Here `using GLMakie` selects the `Makie` backend that provides interactive +plots, and the `interactive_figs` argument specifies that `timing_data()` +should make an interactive plot (in this case for the execution times). Lower level timing data, for example timing MPI and linear-algebra calls, can be enabled by activating 'debug timing'. This can be done by re-defining the diff --git a/makie_post_processing/makie_post_processing/Project.toml b/makie_post_processing/makie_post_processing/Project.toml index cb07db97fe..7ce1cc5ada 100644 --- a/makie_post_processing/makie_post_processing/Project.toml +++ b/makie_post_processing/makie_post_processing/Project.toml @@ -9,7 +9,9 @@ Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" moment_kinetics = "b5ff72cc-06fc-4161-ad14-dba1c22ed34e" diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 9acef44c42..74f51508cc 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -16,6 +16,7 @@ export animate_f_unnorm_vs_vpa, animate_f_unnorm_vs_vpa_z, get_1d_ax, get_2d_ax, irregular_heatmap, irregular_heatmap!, plot_f_unnorm_vs_vpa, plot_f_unnorm_vs_vpa_z, positive_or_nan, get_variable, positive_or_nan, put_legend_above, put_legend_below, put_legend_left, put_legend_right +export timing_data include("shared_utils.jl") @@ -53,9 +54,11 @@ using LsqFit using MPI using NaNMath using OrderedCollections +using StatsBase using TOML using CairoMakie +using Makie const default_input_file_name = "post_processing_input.toml" @@ -354,6 +357,8 @@ function makie_post_process(run_dir::Union{String,Tuple}, manufactured_solutions_analysis(run_info; plot_prefix=plot_prefix, nvperp=nvperp) manufactured_solutions_analysis_dfns(run_info_dfns; plot_prefix=plot_prefix) + timing_data(run_info; plot_prefix=plot_prefix, this_input_dict=input_dict) + for ri ∈ run_info close_run_info(ri) end @@ -814,6 +819,16 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, animate_steady_state_residual=false, ) + set_defaults_and_check_section!( + this_input_dict, "timing_data"; + plot=false, + threshold=1.0e-2, + include_patterns=String[], + exclude_patterns=String[], + ranks=mk_int[], + figsize=(600,800) + ) + # We allow top-level options in the post-processing input file check_sections!(this_input_dict; check_no_top_level_options=false) @@ -7595,14 +7610,19 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n run_info = (run_info,) end - if electron - println("Making electron timestep diagnostics plots") - else - println("Making timestep diagnostics plots") - end - input = Dict_to_NamedTuple(input_dict["timestep_diagnostics"]) + if input.plot || input.animate_CFL || input.plot_timestep_residual || + input.animate_timestep_residual || input.plot_timestep_error || + input.animate_timestep_error || input.plot_steady_state_residual || + input.animate_steady_state_residual + if electron + println("Making electron timestep diagnostics plots") + else + println("Making timestep diagnostics plots") + end + end + steps_fig = nothing dt_fig = nothing CFL_fig = nothing @@ -8406,6 +8426,404 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n return steps_fig, dt_fig, CFL_fig end +""" + timing_data(run_info; plot_prefix=nothing, threshold=nothing, + include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, + figsize=nothing, interactive_figs=nothing, include_legend=true) + +Plot timings from different parts of the `moment_kinetics` code. Only timings from +function calls during the time evolution loop are included, not from the setup, because we +plot versus time. + +To reduce clutter, timings whose total time (at the final time point) is less than +`threshold` times the overall run time will be excluded. By default, `threshold` is +`1.0e-3`. + +When there is more than one MPI rank present, the timings for each rank will be plotted +separately. The lines will be labelled with the MPI rank, with the position of the labels +moving along the lines one point at a time, to try to avoid overlapping many labels. If +the curves all overlap, this will look like one curve labelled by many MPI ranks. + +There are many timers, so it can be useful to filter them to see only the most relevant +ones. By default all timers will be plotted. If `include_patterns` is passed, and +`exclude_patterns` is not, then only the total time and any timers that match +`include_patterns` (matches checked using `occursin()`) will be included in the plots. If +`exclude_patterns` is passed, then any timers that match (matches checked using +`occursin()`) `exclude_patterns` will be omitted, unless they match `include_patterns` in +which case they will still be included. If `ranks` is passed, then only the MPI ranks with +indices found in `ranks` will be included. + +`figsize` can be passed to customize the size of the figures that plots are made on. This +can be useful because the legends may become very large when many timers are plotted, in +which case a larger figure might be needed. + +`threshold`, `exclude_patterns`, `include_patterns`, `ranks`, and `figsize` can also be +set in `this_input_dict`. When this function is called as part of +[`makie_post_process`](@ref), [`input_dict`](@ref) is passed as `this_input_dict` so that +the settings are read from the post processing input file (by default +`post_processing_input.toml`). The function arguments take precedence, if they are given. + +If you load GLMakie (by doing `using GLMakie`) before running this function, +`interactive_figs` can be passed one of, or a Vector of, `:times`, `:ncalls`, or `:allocs` +to display an interactive window showing that figure. Multiple values can be passed, but +the DataInspector (which displays information when you hover over the figure) will only +work for the first one in the list (this may be a GLMakie bug?). + +Pass `include_legend=false` to remove legends from the figures. This is mostly useful for +interactive figures where hovering over the lines can show what they are, so that the +legend is not needed. +""" +function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, + include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, + this_input_dict=nothing, figsize=nothing, interactive_figs=nothing, + include_legend=true) + + if this_input_dict !== nothing + input = Dict_to_NamedTuple(this_input_dict["timing_data"]) + else + input = nothing + end + + if input !== nothing && !input.plot + return nothing + end + + println("Making timing data plots") + + if figsize === nothing + if input !== nothing + figsize = input.figsize + else + figsize = (600,800) + end + end + + times_fig, times_ax, times_legend_place = + get_1d_ax(; xlabel="time", ylabel="execution time per output step (s)", get_legend_place=:below, + size=figsize) + ncalls_fig, ncalls_ax, ncalls_legend_place = + get_1d_ax(; xlabel="time", ylabel="number of calls per output step", get_legend_place=:below, + size=figsize) + allocs_fig, allocs_ax, allocs_legend_place = + get_1d_ax(; xlabel="time", ylabel="allocations per output step (MB)", get_legend_place=:below, + size=figsize) + + for (irun,ri) ∈ enumerate(run_info) + timing_data(ri; plot_prefix=plot_prefix, threshold=threshold, + include_patterns=include_patterns, exclude_patterns=exclude_patterns, + ranks=ranks, this_input_dict=this_input_dict, times_ax=times_ax, + ncalls_ax=ncalls_ax, allocs_ax=allocs_ax, irun=irun, figsize=figsize) + end + + if plot_prefix !== nothing + put_legend_below(times_fig, times_ax; merge=true) + outfile = plot_prefix * "execution_times.pdf" + save(outfile, times_fig) + + put_legend_below(ncalls_fig, ncalls_ax; merge=true) + outfile = plot_prefix * "ncalls.pdf" + save(outfile, ncalls_fig) + + put_legend_below(allocs_fig, allocs_ax; merge=true) + outfile = plot_prefix * "allocations.pdf" + save(outfile, allocs_fig) + end + + if interactive_figs === nothing && string(Makie.current_backend()) == "GLMakie" + # Can make interactive plots + + if isa(interactive_figs, Symbol) + interactive_figs = [interactive_figs] + end + backend = Makie.current_backend() + + for figtype ∈ interactive_figs + if figtype == :times + DataInspector(times_fig) + display(backend.Screen(), times_fig) + elseif figtype == :ncalls + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + elseif figtype == :allocs + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) + else + error("Got unrecognized entry $figtype in `interactive_figs`") + end + end + end + + return times_fig, ncalls_fig, allocs_fig +end + +function timing_data(run_info; plot_prefix=nothing, threshold=nothing, + include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, + this_input_dict=nothing, times_ax=nothing, ncalls_ax=nothing, + allocs_ax=nothing, irun=1, figsize=nothing, interactive_figs=nothing, + include_legend=true) + + if this_input_dict !== nothing + input = Dict_to_NamedTuple(this_input_dict["timing_data"]) + else + input = nothing + end + + if input !== nothing && !input.plot + return nothing + end + + if figsize === nothing + if input !== nothing + figsize = input.figsize + else + figsize = (600,800) + end + end + + if threshold === nothing + if input !== nothing + threshold = input.threshold + else + threshold = 1.0e-2 + end + end + if isa(include_patterns, AbstractString) + include_patterns = [include_patterns] + end + if isa(exclude_patterns, AbstractString) + exclude_patterns = [exclude_patterns] + end + if input !== nothing && include_patterns === nothing + include_patterns = input.include_patterns + if length(include_patterns) == 0 + include_patterns = nothing + end + end + if input !== nothing && exclude_patterns === nothing + exclude_patterns = input.exclude_patterns + if length(exclude_patterns) == 0 + exclude_patterns = nothing + end + end + if input !== nothing && ranks === nothing + ranks = input.ranks + end + + if times_ax === nothing + times_fig, times_ax, times_legend_place = + get_1d_ax(; xlabel="time", ylabel="execution time per output step (s)", + get_legend_place=:below, size=figsize) + else + times_fig = nothing + end + if ncalls_ax === nothing + ncalls_fig, ncalls_ax, ncalls_legend_place = + get_1d_ax(; xlabel="time", ylabel="number of calls per output step", get_legend_place=:below) + else + ncalls_fig = nothing + end + if allocs_ax === nothing + allocs_fig, allocs_ax, allocs_legend_place = + get_1d_ax(; xlabel="time", ylabel="allocations per output step (MB)", get_legend_place=:below) + else + allocs_fig = nothing + end + + linestyles = linestyle=[:solid, :dash, :dot, :dashdot, :dashdotdot] + for irank ∈ 0:run_info.nrank-1 + all_global_timer_variables = run_info.timing_variable_names[irank] + time_advance_timer_variables = [v for v ∈ all_global_timer_variables if occursin("time_advance! step", v)] + time_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "time:")] + ncalls_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "ncalls:")] + allocs_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "allocs:")] + + this_rank_group = "timing_data/rank$irank" + + inspector_label_func = (self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])" + + function label_irank(ax, variable, color, unit_conversion=1) + if run_info.nrank > 1 + # Label curves with irank so we can tell which is which + index = ((irank + 1) % (length(variable) - 1)) + 1 + with_theme + text!(ax, run_info.time[index], + variable[index] * unit_conversion; + text="$irank", color=color) + end + end + + function check_include_exclude(variable_name) + explicitly_included = (include_patterns !== nothing && + any(occursin(p, variable_name) for p ∈ include_patterns)) + if exclude_patterns === nothing && include_patterns !== nothing + excluded = !explicitly_included + elseif exclude_patterns !== nothing + if !explicitly_included && + any(occursin(p, variable_name) for p ∈ exclude_patterns) + excluded = true + else + excluded = false + end + else + excluded = false + end + return excluded, explicitly_included + end + + # Plot the total time + time_unit_conversion = 1.0e-9 # ns to s + total_time_variable_name = "time:moment_kinetics;time_advance! step" + total_time = get_variable(run_info, total_time_variable_name * "_per_step", + group=this_rank_group) + label = "time_advance! step" + lines!(times_ax, run_info.time, total_time .* time_unit_conversion; color=:black, + linestyle=linestyles[irun], label=label, + inspector_label=inspector_label_func) + label_irank(times_ax, total_time, :black, time_unit_conversion) + mean_total_time = mean(total_time) + for (variable_counter, variable_name) ∈ enumerate(time_variables) + if variable_name == total_time_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=this_rank_group) + if !explicitly_included && mean(variable) < threshold * mean_total_time + # This variable takes a very small amount of time, so skip. + continue + end + label = split(variable_name, "time_advance! step;")[2] + l = lines!(times_ax, run_info.time, variable .* time_unit_conversion; + color=Cycled(variable_counter), linestyle=linestyles[irun], + label=label, inspector_label=inspector_label_func) + label_irank(times_ax, variable, l.color, time_unit_conversion) + end + + # Plot the number of calls + total_ncalls_variable_name = "ncalls:moment_kinetics;time_advance! step" + total_ncalls = get_variable(run_info, total_ncalls_variable_name * "_per_step", + group=this_rank_group) + label = "time_advance! step" + lines!(ncalls_ax, run_info.time, total_ncalls; color=:black, + linestyle=linestyles[irun], label=label, + inspector_label=inspector_label_func) + label_irank(ncalls_ax, total_ncalls, :black) + mean_total_ncalls = mean(total_ncalls) + for (variable_counter, variable_name) ∈ enumerate(ncalls_variables) + if variable_name == total_ncalls_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=this_rank_group) + if !explicitly_included && mean(variable) < threshold * mean_total_ncalls + # This variable takes a very small number of calls, so skip. + continue + end + label = split(variable_name, "time_advance! step;")[2] + l = lines!(ncalls_ax, run_info.time, variable; color=Cycled(variable_counter), + linestyle=linestyles[irun], label=label, + inspector_label=inspector_label_func) + label_irank(ncalls_ax, variable, l.color) + end + + # Plot the total allocs + allocs_unit_conversion = 2^(-20) # bytes to MB + total_allocs_variable_name = "allocs:moment_kinetics;time_advance! step" + total_allocs = get_variable(run_info, total_allocs_variable_name * "_per_step", + group=this_rank_group) + label = "time_advance! step" + lines!(allocs_ax, run_info.time, total_allocs .* allocs_unit_conversion; + color=:black, linestyle=linestyles[irun], label=label, + inspector_label=inspector_label_func) + label_irank(allocs_ax, total_allocs, :black, allocs_unit_conversion) + mean_total_allocs = mean(total_allocs) + for (variable_counter, variable_name) ∈ enumerate(allocs_variables) + if variable_name == total_allocs_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=this_rank_group) + if !explicitly_included && mean(variable) < threshold * mean_total_allocs + # This variable represents a very small amount of allocs, so skip. + continue + end + label = split(variable_name, "time_advance! step;")[2] + l = lines!(allocs_ax, run_info.time, variable .* allocs_unit_conversion; + color=Cycled(variable_counter), linestyle=linestyles[irun], + label=label, inspector_label=inspector_label_func) + label_irank(allocs_ax, variable, l.color, + allocs_unit_conversion) + end + end + + if times_fig !== nothing + put_legend_below(times_fig, times_ax; merge=true) + if plot_prefix !== nothing + outfile = plot_prefix * "execution_times.pdf" + save(outfile, times_fig) + end + end + + if ncalls_fig !== nothing + put_legend_below(ncalls_fig, ncalls_ax; merge=true) + if plot_prefix !== nothing + outfile = plot_prefix * "ncalls.pdf" + save(outfile, ncalls_fig) + end + end + + if allocs_fig !== nothing + put_legend_below(allocs_fig, allocs_ax; merge=true) + if plot_prefix !== nothing + outfile = plot_prefix * "allocations.pdf" + save(outfile, allocs_fig) + end + end + + if times_fig !== nothing && plot_prefix === nothing && + string(Makie.current_backend()) == "GLMakie" + + # Can make interactive plots + + if isa(interactive_figs, Symbol) + interactive_figs = [interactive_figs] + end + backend = Makie.current_backend() + + for figtype ∈ interactive_figs + if figtype == :times + DataInspector(times_fig) + display(backend.Screen(), times_fig) + elseif figtype == :ncalls + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + elseif figtype == :allocs + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) + else + error("Got unrecognized entry $figtype in `interactive_figs`") + end + end + end + + return times_fig, ncalls_fig, allocs_fig +end + + # Utility functions ################### # diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 8279942c7d..a73393c48d 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3595,6 +3595,17 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin evolving_variables = Tuple(evolving_variables) end + timing_variable_names = Dict{mk_int, Vector{String}}() + for fid ∈ fids0 + timing_group = get_group(fid, "timing_data") + timing_rank_names = collect(k for k in keys(timing_group) if startswith(k, "rank")) + for group_name ∈ timing_rank_names + rank_group = get_group(timing_group, group_name) + irank = parse(mk_int, split(group_name, "rank")[2]) + timing_variable_names[irank] = collect(keys(rank_group)) + end + end + if parallel_io files = fids0 else @@ -3628,7 +3639,7 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin vzeta_chunk_size=vzeta_chunk_size, vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, variable_names=variable_names, evolving_variables=evolving_variables, - dfns=dfns) + timing_variable_names=timing_variable_names, dfns=dfns) return run_info end @@ -4174,7 +4185,8 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t # Get a 'per step' value from a saved 'cumulative' value. E.g. 'iterations per step' # from a saved 'cumulative total iterations' - function get_per_step_from_cumulative_variable(run_info, varname::String; kwargs...) + function get_per_step_from_cumulative_variable(run_info, varname::AbstractString; + kwargs...) variable = get_variable(run_info, varname; kwargs...) tdim = ndims(variable) for i ∈ size(variable, tdim):-1:2 @@ -5020,6 +5032,13 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nl_linear_iterations = get_per_step_from_cumulative_variable( run_info, "$(prefix)_linear_iterations"; kwargs...) variable = nl_linear_iterations ./ nl_iterations + elseif endswith(variable_name, "_per_step") && variable_name ∉ run_info.variable_names + # If "_per_step" is appended to a variable name, assume it is a cumulative + # variable, and get the per-step version. + variable = + get_per_step_from_cumulative_variable(run_info, + split(variable_name, "_per_step")[1]; + kwargs...) else variable = postproc_load_variable(run_info, variable_name; kwargs...) end