Merge #2112

2112: Simplify configurations for performance jobs r=nefrathenrici a=nefrathenrici Content - Splits up `AtmosConfig` constructor, for a clearer flow and dispatch on `Dicts`, `Strings` (filepath) and `Nothing`. This allows for circumventing the CLI and adds another entry point in the config generation flow. This is technically a breaking change, and might need to be changed before merging. - Add `perf/common.jl` with two new functions to easily generate configurations for performance jobs. `AtmosCoveragePerfConfig` takes in a config dictionary, overrides the default performance configuration, and passes it to `AtmosConfig`. `TargetJobConfig` takes in a job id, and constructs its corresponding `AtmosConfig` - Rework `flame.jl`, `jet_test_nfailures.jl`, and `benchmark.jl` to maintain continuity for the perf tests. Apply this change for the relevant buildkite jobs in `pipeline.yml`. - Add tests for `AtmosCoveragePerfConfig` Co-authored-by: nefrathenrici <[email protected]>
CliMA · Oct 6, 2023 · d54a0df · d54a0df
2 parents bb4b1ed + da0bd8a
commit d54a0df
Show file tree

Hide file tree

Showing 32 changed files with 209 additions and 144 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -569,7 +569,7 @@ steps:
       - label: ":computer: checkbounds"
         command: >
           julia --color=yes --check-bounds=yes --project=perf perf/benchmark.jl
-          --config_file $PERF_CONFIG_PATH/checkbounds.yml
+          $PERF_CONFIG_PATH/checkbounds.yml
         artifact_paths: "checkbounds/*"
         agents:
           slurm_mem: 20GB
@@ -970,14 +970,14 @@ steps:
       - label: ":computer: Benchmark: perf target (default)"
         command: >
           julia --color=yes --project=perf perf/benchmark.jl
-          --config_file $PERF_CONFIG_PATH/bm_perf_target.yml
+          $PERF_CONFIG_PATH/bm_perf_target.yml
         agents:
           slurm_mem: 20GB
 
       - label: ":computer: Benchmark: perf target (Threaded)"
         command: >
           julia --color=yes --threads 8 --project=perf perf/benchmark.jl
-          --config_file $PERF_CONFIG_PATH/bm_perf_target_threaded.yml
+          $PERF_CONFIG_PATH/bm_perf_target_threaded.yml
         agents:
           slurm_mem: 20GB
           slurm_cpus_per_task: 8
@@ -986,47 +986,47 @@ steps:
       - label: ":fire: Flame graph: perf target (default)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file $PERF_CONFIG_PATH/flame/perf_target.yml
+          $PERF_CONFIG_PATH/flame_perf_target.yml
         artifact_paths: "flame_perf_target/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (with tracers)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file  $PERF_CONFIG_PATH/flame/perf_target_tracers.yml
+          $PERF_CONFIG_PATH/flame_perf_target_tracers.yml
         artifact_paths: "flame_perf_target_tracers/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (edmfx)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file $PERF_CONFIG_PATH/flame/perf_target_edmfx.yml
+          $PERF_CONFIG_PATH/flame_perf_target_edmfx.yml
         artifact_paths: "flame_perf_target_edmfx/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (diagnostic edmfx)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file  $PERF_CONFIG_PATH/flame/perf_target_diagnostic_edmfx.yml
+          $PERF_CONFIG_PATH/flame_perf_target_diagnostic_edmfx.yml
         artifact_paths: "flame_perf_target_diagnostic_edmfx/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (edmf)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file  $PERF_CONFIG_PATH/flame/perf_target_edmf.yml
+          $PERF_CONFIG_PATH/flame_perf_target_edmf.yml
         artifact_paths: "flame_perf_target_edmf/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (Threaded)"
         command: >
           julia --threads 8 --color=yes --project=perf perf/flame.jl
-          --config_file  $PERF_CONFIG_PATH/flame/perf_target_threaded.yml
+          $PERF_CONFIG_PATH/flame_perf_target_threaded.yml
         artifact_paths: "flame_perf_target_threaded/*"
         agents:
           slurm_cpus_per_task: 8
@@ -1035,23 +1035,23 @@ steps:
       - label: ":fire: Flame graph: perf target (Callbacks)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file  $PERF_CONFIG_PATH/flame/perf_target_callbacks.yml
+          $PERF_CONFIG_PATH/flame_perf_target_callbacks.yml
         artifact_paths: "flame_perf_target_callbacks/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: gravity wave"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file $PERF_CONFIG_PATH/flame/perf_gw.yml
+          $PERF_CONFIG_PATH/flame_perf_gw.yml
         artifact_paths: "flame_perf_gw/*"
         agents:
           slurm_mem: 20GB
 
       - label: ":fire: Flame graph: perf target (diagnostics)"
         command: >
           julia --color=yes --project=perf perf/flame.jl
-          --config_file $PERF_CONFIG_PATH/flame/diagnostics.yml
+          $PERF_CONFIG_PATH/flame_perf_diagnostics.yml
         artifact_paths: "flame_perf_diagnostics/*"
         agents:
           slurm_mem: 20GB
@@ -1060,15 +1060,14 @@ steps:
       - label: ":rocket: JET n-failures (inference)"
         command: >
           julia --color=yes --project=perf perf/jet_test_nfailures.jl
-          --config_file $PERF_CONFIG_PATH/jet_n_failures.yml
+          $PERF_CONFIG_PATH/jet_n_failures.yml
         agents:
           slurm_mem: 20GB
 
       # Latency
       - label: ":mag::rocket: Invalidations"
         command: >
           julia --color=yes --project=perf perf/invalidations.jl
-          --config_file $PERF_CONFIG_PATH/invalidations.yml
         artifact_paths: "invalidations/*"
         agents:
           slurm_mem: 20GB

diff --git a/config/default_configs/default_config.yml b/config/default_configs/default_config.yml
@@ -113,9 +113,6 @@ output_dir:
 device:
   help: "Device type to use [`auto` (default) `CPUSingleThreaded`, `CPUMultiThreaded`, `CUDADevice`]"
   value: "auto"
-target_job:
-  help: "An (optional) job to target for analyzing performance"
-  value: ~
 reference_job_id:
   help: |-
     Identifier of job to use as the "reference" solution in the quicklook plot; the current job's results get compared to the results of the quicklook job on the main branch (only used if `debugging_tc` is `true`)

diff --git a/config/perf_configs/checkbounds.yml b/config/perf_configs/checkbounds.yml
@@ -1 +1 @@
-job_id: "checkbounds"
+job_id: "checkbounds"
diff --git a/config/perf_configs/flame/diagnostics.yml b/config/perf_configs/flame/diagnostics.yml
diff --git a/config/perf_configs/flame/perf_gw.yml b/config/perf_configs/flame/perf_gw.yml
diff --git a/config/perf_configs/flame/perf_target_diagnostic_edmfx.yml b/config/perf_configs/flame/perf_target_diagnostic_edmfx.yml
diff --git a/config/perf_configs/flame/perf_target_edmf.yml b/config/perf_configs/flame/perf_target_edmf.yml
diff --git a/config/perf_configs/flame/perf_target_edmfx.yml b/config/perf_configs/flame/perf_target_edmfx.yml
diff --git a/config/perf_configs/flame/perf_target_threaded.yml b/config/perf_configs/flame/perf_target_threaded.yml
diff --git a/config/perf_configs/flame_perf_diagnostics.yml b/config/perf_configs/flame_perf_diagnostics.yml
@@ -0,0 +1,11 @@
+diagnostics:
+  - reduction_time: "average"
+    short_name: "ua"
+    period: "1secs"
+  - reduction_time: "max"
+    short_name: "va"
+    period: "1secs"
+  - reduction_time: "max"
+    short_name: "ta"
+    period: "1secs"
+job_id: "flame_perf_diagnostics"
diff --git a/config/perf_configs/flame_perf_gw.yml b/config/perf_configs/flame_perf_gw.yml
@@ -0,0 +1,9 @@
+dz_bottom: 300.0
+rayleigh_sponge: true
+toml:
+  - "toml/flame_perf_gw.toml"
+orographic_gravity_wave: "raw_topo"
+idealized_insolation: false
+non_orographic_gravity_wave: true
+job_id: "flame_perf_gw"
+z_max: 45000.0
diff --git a/config/perf_configs/flame/perf_target.yml → config/perf_configs/flame_perf_target.yml b/config/perf_configs/flame/perf_target.yml → config/perf_configs/flame_perf_target.yml
diff --git a/...f_configs/flame/perf_target_callbacks.yml → ...f_configs/flame_perf_target_callbacks.yml b/...f_configs/flame/perf_target_callbacks.yml → ...f_configs/flame_perf_target_callbacks.yml
@@ -1,5 +1,4 @@
 dt_save_to_disk: "1secs"
 dt_save_restart: "1secs"
 dt_rad: "1secs"
-dt_save_to_sol: "1secs"
 job_id: "flame_perf_target_callbacks"
diff --git a/config/perf_configs/flame_perf_target_diagnostic_edmfx.yml b/config/perf_configs/flame_perf_target_diagnostic_edmfx.yml
@@ -0,0 +1,9 @@
+dt_save_to_disk: "600secs"
+edmfx_entr_model: "ConstantTimescale"
+edmfx_nh_pressure: true
+edmfx_upwinding: "first_order"
+prognostic_tke: true
+edmfx_sgs_flux: true
+turbconv: "diagnostic_edmfx"
+job_id: "flame_perf_target_diagnostic_edmfx"
+edmfx_detr_model: "ConstantCoefficient"
diff --git a/config/perf_configs/flame_perf_target_edmf.yml b/config/perf_configs/flame_perf_target_edmf.yml
@@ -0,0 +1,9 @@
+rad: ~
+apply_limiter: false
+dt_save_to_disk: "10secs"
+precip_model: ~
+turbconv: "edmf"
+post_process: false
+FLOAT_TYPE: "Float64"
+job_id: "flame_perf_target_edmf"
+z_stretch: false
diff --git a/config/perf_configs/flame_perf_target_edmfx.yml b/config/perf_configs/flame_perf_target_edmfx.yml
@@ -0,0 +1,9 @@
+dt_save_to_disk: "30secs"
+reference_job_id: "sphere_baroclinic_wave_rhoe_equilmoist"
+ode_algo: "SSP33ShuOsher"
+initial_condition: "MoistBaroclinicWaveWithEDMF"
+edmfx_entr_model: "ConstantCoefficient"
+edmfx_detr_model: "ConstantCoefficient"
+edmfx_sgs_flux: true
+turbconv: "edmfx"
+job_id: "flame_perf_target_edmfx"
diff --git a/config/perf_configs/flame_perf_target_threaded.yml b/config/perf_configs/flame_perf_target_threaded.yml
@@ -0,0 +1 @@
+job_id: "flame_perf_target_threaded"
diff --git a/...erf_configs/flame/perf_target_tracers.yml → ...erf_configs/flame_perf_target_tracers.yml b/...erf_configs/flame/perf_target_tracers.yml → ...erf_configs/flame_perf_target_tracers.yml
diff --git a/config/perf_configs/jet_n_failures.yml b/config/perf_configs/jet_n_failures.yml
@@ -1,5 +1 @@
-target_job: "sphere_baroclinic_wave_rhoe_equilmoist_edmf"
-moist: "dry"
-precip_model: ~
-rad: ~
-job_id: "jet_n_failures"
+job_id: "jet_n_failures"
diff --git a/docs/src/repl_scripts.jl b/docs/src/repl_scripts.jl
@@ -16,7 +16,7 @@ function print_repl_script(config)
         end
     end
     ib *= """\n"""
-    ib *= """config = CA.AtmosConfig(; config_dict);\n"""
+    ib *= """config = CA.AtmosConfig(config_dict);\n"""
     ib *= """\n"""
     ib *= """include("examples/hybrid/driver.jl")\n"""
     println(ib)

diff --git a/perf/benchmark.jl b/perf/benchmark.jl
@@ -1,7 +1,14 @@
 import Random
 Random.seed!(1234)
 import ClimaAtmos as CA
-config = CA.AtmosCoveragePerfConfig()
+
+include("common.jl")
+
+length(ARGS) != 1 && error("Usage: benchmark.jl <config_file>")
+config_file = ARGS[1]
+config_dict = YAML.load_file(config_file)
+config = AtmosCoveragePerfConfig(config_dict)
+
 integrator = CA.get_integrator(config)
 
 (; parsed_args) = config

diff --git a/perf/benchmark_dump.jl b/perf/benchmark_dump.jl
@@ -1,13 +1,3 @@
-#=
-```
-julia --project=examples perf/benchmark_dump.jl
-```
-Or, interactively,
-```
-julia --project=examples
-include(joinpath("perf", "benchmark_dump.jl"));
-```
-=#
 import Random
 Random.seed!(1234)
 import ClimaAtmos as CA
@@ -17,16 +7,16 @@ using Plots
 using PrettyTables
 import YAML
 
+# Need to generate config_dict here to override `h_elem` in the loop below
 parsed_args = CA.parse_commandline(CA.argparse_settings())
 config_dict = YAML.load_file(parsed_args["config_file"])
 output_dir = joinpath(config_dict["job_id"])
 
 steptimes = []
-
 # Iterate through varying number of horizontal elements
 for h_elem in 8:8:40
     config_dict["h_elem"] = h_elem
-    config = CA.AtmosConfig(; config_dict = config_dict)
+    config = CA.AtmosConfig(config_dict)
     integrator = CA.get_integrator(config)
     Y₀ = deepcopy(integrator.u)
 

diff --git a/perf/common.jl b/perf/common.jl
@@ -0,0 +1,42 @@
+import YAML
+
+"""
+    AtmosCoveragePerfConfig()
+    AtmosCoveragePerfConfig(; config_dict)
+
+Creates a model configuration that covers many physics components.
+The configuration precedence is as follows:
+    1. Configuration from the given config file/dict (highest precendence)
+    2. Default perf configuration (to increase coverage)
+    3. Default configuration (lowest precedence)
+"""
+function AtmosCoveragePerfConfig(config_dict = Dict())
+    perf_default_config = perf_config_dict()
+    config_dict = merge(perf_default_config, config_dict)
+    return CA.AtmosConfig(config_dict)
+end
+
+"""
+    TargetJobConfig(target_job)
+
+Creates a full model configuration from the given target job.
+"""
+TargetJobConfig(target_job) =
+    CA.AtmosConfig(CA.config_from_target_job(target_job))
+
+
+"""
+    perf_config_dict()
+
+Loads the default performance configuration from a file into a Dict.
+"""
+function perf_config_dict()
+    perf_defaults = joinpath(
+        dirname(@__FILE__),
+        "..",
+        "config",
+        "default_configs",
+        "default_perf.yml",
+    )
+    return YAML.load_file(perf_defaults)
+end
diff --git a/perf/flame.jl b/perf/flame.jl
@@ -1,7 +1,14 @@
 import Random
 Random.seed!(1234)
 import ClimaAtmos as CA
-config = CA.AtmosCoveragePerfConfig()
+
+include("common.jl")
+
+length(ARGS) != 1 && error("Usage: flame.jl <config_file>")
+config_file = ARGS[1]
+config_dict = YAML.load_file(config_file)
+config = AtmosCoveragePerfConfig(config_dict)
+job_id = config.parsed_args["job_id"]
 integrator = CA.get_integrator(config)
 
 # The callbacks flame graph is very expensive, so only do 2 steps.
@@ -11,11 +18,9 @@ import SciMLBase
 SciMLBase.step!(integrator) # compile first
 CA.call_all_callbacks!(integrator) # compile callbacks
 import Profile, ProfileCanvas
-(; output_dir, job_id) = integrator.p.simulation
 output_dir = job_id
 mkpath(output_dir)
 
-
 @info "collect profile"
 Profile.clear()
 prof = Profile.@profile SciMLBase.step!(integrator)
@@ -58,7 +63,7 @@ allocs_limit["flame_perf_target"] = 12864
 allocs_limit["flame_perf_target_tracers"] = 212496
 allocs_limit["flame_perf_target_edmfx"] = 304064
 allocs_limit["flame_perf_diagnostics"] = 3024344
-allocs_limit["flame_perf_target_diagnostic_edmfx"] = 862576
+allocs_limit["flame_perf_target_diagnostic_edmfx"] = 862960
 allocs_limit["flame_perf_target_edmf"] = 12459299664
 allocs_limit["flame_perf_target_threaded"] = 6175664
 allocs_limit["flame_perf_target_callbacks"] = 46413904