From 6e63e21be08126ac27d2b1267b20f974eb6aaba3 Mon Sep 17 00:00:00 2001 From: nefrathenrici Date: Tue, 1 Oct 2024 13:44:47 -0700 Subject: [PATCH 1/2] Fix output path for calibration e2e test --- calibration/test/e2e_test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calibration/test/e2e_test.jl b/calibration/test/e2e_test.jl index fb16f98303..81ea2007ea 100644 --- a/calibration/test/e2e_test.jl +++ b/calibration/test/e2e_test.jl @@ -20,7 +20,7 @@ using Test const experiment_dir = joinpath(pkgdir(CA), "calibration", "test") const model_interface = joinpath(pkgdir(CA), "calibration", "model_interface.jl") -const output_dir = joinpath("output", "calibration_end_to_end_test") +const output_dir = "calibration_end_to_end_test" include(model_interface) # Observation map From c8cfa9e0f928ce8e1df8116ef9351234deaf9e5e Mon Sep 17 00:00:00 2001 From: nefrathenrici Date: Tue, 1 Oct 2024 15:02:23 -0700 Subject: [PATCH 2/2] Check that the cluster is not too busy to run calibration test --- calibration/test/e2e_test.jl | 52 ++++++++++++++++++++++--------- calibration/test/model_config.yml | 2 +- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/calibration/test/e2e_test.jl b/calibration/test/e2e_test.jl index 81ea2007ea..a2d4463e55 100644 --- a/calibration/test/e2e_test.jl +++ b/calibration/test/e2e_test.jl @@ -16,16 +16,46 @@ import EnsembleKalmanProcesses as EKP import Statistics: var, mean using Test +using Dates + +if !(@isdefined backend) + backend = CAL.get_backend() +end +# Check that the wait time for the last hour does not exceed 20 minutes. +# This test schedules many slurm jobs and will be prohibitively slow if the cluster is busy +if backend <: CAL.HPCBackend + wait_times = readchomp( + `sacct --allocations -u esmbuild --starttime now-1hour -o Submit,Start -n`, + ) + wait_times = split(wait_times, '\n', keepempty = false) + # Filter jobs that have not been submitted and started + filter!(x -> !(contains(x, "Unknown") || contains(x, "None")), wait_times) + + mean_wait_time_in_mins = + mapreduce(+, wait_times; init = 0) do line + t1_str, t2_str = split(line) + t1 = DateTime(t1_str, dateformat"yyyy-mm-ddTHH:MM:SS") + t2 = DateTime(t2_str, dateformat"yyyy-mm-ddTHH:MM:SS") + Dates.value(t2 - t1) / 1000 / 60 + end / length(wait_times) + + if mean_wait_time_in_mins > 20 + @warn """Average wait time for esmbuild is $(round(mean_wait_time_in_mins, digits=2)) minutes. \ + Cluster is too busy to run this test, exiting""" + exit() + end +end + # Paths and setup const experiment_dir = joinpath(pkgdir(CA), "calibration", "test") const model_interface = joinpath(pkgdir(CA), "calibration", "model_interface.jl") const output_dir = "calibration_end_to_end_test" include(model_interface) +ensemble_size = 10 # Observation map function CAL.observation_map(iteration) - ensemble_size = 10 single_member_dims = (1,) G_ensemble = Array{Float64}(undef, single_member_dims..., ensemble_size) @@ -66,7 +96,6 @@ astronomical_unit = 149_597_870_000 observations = JLD2.load_object(obs_path) noise = 0.1 * I n_iterations = 3 -ensemble_size = 10 prior = CAL.get_prior(joinpath(experiment_dir, "prior.toml")) experiment_config = CAL.ExperimentConfig(; n_iterations, @@ -88,20 +117,15 @@ function minimal_eki_test(eki) @test mean(last(params)) ≈ astronomical_unit rtol = 0.02 end -# Caltech HPC backend -if !(@isdefined backend) - backend = CAL.get_backend() -end @info "Running calibration E2E test" backend if backend <: CAL.HPCBackend - hpc_kwargs = - test_eki = CAL.calibrate( - backend, - experiment_config; - hpc_kwargs = CAL.kwargs(time = 15), - model_interface, - verbose = true, - ) + test_eki = CAL.calibrate( + backend, + experiment_config; + hpc_kwargs = CAL.kwargs(time = 15), + model_interface, + verbose = true, + ) else test_eki = CAL.calibrate(backend, experiment_config) end diff --git a/calibration/test/model_config.yml b/calibration/test/model_config.yml index 45272f6966..d36b49d516 100644 --- a/calibration/test/model_config.yml +++ b/calibration/test/model_config.yml @@ -4,7 +4,7 @@ config: column h_elem: 1 insolation: timevarying job_id: calibration_end_to_end_test -output_dir: output/calibration_end_to_end_test +output_dir: calibration_end_to_end_test output_default_diagnostics: false dt_rad: 6hours rad: clearsky