From 6d92728f5f606652bbd5a5a0457baad32f2194e0 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 25 Dec 2024 21:26:17 -0500 Subject: [PATCH] Reproducibility infrastructure fixes --- reproducibility_tests/ref_counter.jl | 5 +- .../reproducibility_tools.jl | 20 +++++--- .../reproducibility_utils.jl | 7 ++- test/unit_reproducibility_infra.jl | 46 +++++-------------- 4 files changed, 35 insertions(+), 43 deletions(-) diff --git a/reproducibility_tests/ref_counter.jl b/reproducibility_tests/ref_counter.jl index c3ecc92a7d..acc62d2d3c 100644 --- a/reproducibility_tests/ref_counter.jl +++ b/reproducibility_tests/ref_counter.jl @@ -1,4 +1,4 @@ -191 +192 # **README** # @@ -21,6 +21,9 @@ #= +192 +- Reproducibility infrastructure fixes. + 191 - Reproducibility infrastructure debugging. diff --git a/reproducibility_tests/reproducibility_tools.jl b/reproducibility_tests/reproducibility_tools.jl index 10739c34b7..c064e88e2f 100644 --- a/reproducibility_tests/reproducibility_tools.jl +++ b/reproducibility_tests/reproducibility_tools.jl @@ -27,8 +27,15 @@ function error_if_dissimilar_dicts(dicts, dict) end end -all_files_in_dir(dir) = - map((root, dirs, files) -> joinpath(root, file), walkdir(dir)) +function all_files_in_dir(dir) + all_files = String[] + for (root, dirs, files) in walkdir(dir) + for file in files + push!(all_files, joinpath(root, file)) + end + end + return all_files +end function no_comparison_error(dirs, non_existent_files) msg = "\n\n" @@ -252,11 +259,12 @@ function export_reproducibility_results( joinpath(@__DIR__, "ref_counter.jl"), ), skip::Bool = !haskey(ENV, "BUILDKITE_COMMIT"), + repro_folder = "reproducibility_bundle", ) - repro_folder = joinpath(computed_dir, "reproducibility_bundle") - data_file_computed = joinpath(repro_folder, computed_filename) + repro_dir = joinpath(computed_dir, repro_folder) + data_file_computed = joinpath(repro_dir, computed_filename) - mkpath(repro_folder) + mkpath(repro_dir) hdfwriter = InputOutput.HDF5Writer(data_file_computed, comms_ctx) InputOutput.write!(hdfwriter, field_vec, name) Base.close(hdfwriter) @@ -277,7 +285,7 @@ function export_reproducibility_results( for (computed_mse, dir) in zip(computed_mses, dirs) commit_hash = basename(dirname(dir)) computed_mse_file = - joinpath(repro_folder, "computed_mse_$commit_hash.json") + joinpath(repro_dir, "computed_mse_$commit_hash.json") open(computed_mse_file, "w") do io JSON.print(io, computed_mse) diff --git a/reproducibility_tests/reproducibility_utils.jl b/reproducibility_tests/reproducibility_utils.jl index c1065fc46c..122a162725 100644 --- a/reproducibility_tests/reproducibility_utils.jl +++ b/reproducibility_tests/reproducibility_utils.jl @@ -401,6 +401,7 @@ function move_data_to_save_dir(; ref_counter_PR = read_ref_counter(ref_counter_file_PR), skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", n_hash_characters = 7, + repro_folder = "reproducibility_bundle", ) buildkite_ci || return nothing @@ -419,15 +420,17 @@ function move_data_to_save_dir(; mkpath(dest_root) dest_dir = joinpath(dest_root, commit_sha) mkpath(dest_dir) + dest_repro = joinpath(dest_dir, repro_folder) + mkpath(dest_repro) # Always move reproducibility data, so that we # can compare against multiple references for src in dirs_src - dst = joinpath(dest_dir, basename(src)) + dst = joinpath(dest_repro, basename(src)) mv(src, dst; force = true) debug_reproducibility() && @info "Reproducibility: File $src moved to $dst" end - ref_counter_file_main = joinpath(dest_dir, "ref_counter.jl") + ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl") mv(ref_counter_file_PR, ref_counter_file_main; force = true) else if debug_reproducibility() diff --git a/test/unit_reproducibility_infra.jl b/test/unit_reproducibility_infra.jl index 730338417b..3eee46222a 100644 --- a/test/unit_reproducibility_infra.jl +++ b/test/unit_reproducibility_infra.jl @@ -1199,6 +1199,8 @@ if pkgversion(ClimaCore) ≥ v"0.14.18" @test source_checksum(hash1) == source_checksum(computed_dir) @test source_checksum(hash2) == source_checksum(computed_dir) + repro_folder = "repro_bundle" + repro_dir = joinpath(save_dir, "hash_new", repro_folder) move_data_to_save_dir(; dest_root = save_dir, buildkite_ci = true, @@ -1212,25 +1214,12 @@ if pkgversion(ClimaCore) ≥ v"0.14.18" "ref_counter.jl", ), ref_counter_PR = 3, + repro_folder, skip = false, ) - @test isfile( - joinpath( - save_dir, - "hash_new", - "job_id_1", - "ref_prog_state.hdf5", - ), - ) - @test isfile( - joinpath( - save_dir, - "hash_new", - "job_id_2", - "ref_prog_state.hdf5", - ), - ) - @test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl")) + @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5")) + @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5")) + @test isfile(joinpath(repro_dir, "ref_counter.jl")) end end @@ -1283,6 +1272,8 @@ if pkgversion(ClimaCore) ≥ v"0.14.18" @test source_checksum(hash1) == source_checksum(computed_dir) @test source_checksum(hash2) == source_checksum(computed_dir) + repro_folder = "repro_bundle" + repro_dir = joinpath(save_dir, "hash_new", repro_folder) move_data_to_save_dir(; dest_root = save_dir, buildkite_ci = true, @@ -1295,26 +1286,13 @@ if pkgversion(ClimaCore) ≥ v"0.14.18" ref_counter_file_dir, "ref_counter.jl", ), + repro_folder, ref_counter_PR = 3, skip = false, ) - @test isfile( - joinpath( - save_dir, - "hash_new", - "job_id_1", - "ref_prog_state.hdf5", - ), - ) - @test isfile( - joinpath( - save_dir, - "hash_new", - "job_id_2", - "ref_prog_state.hdf5", - ), - ) - @test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl")) + @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5")) + @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5")) + @test isfile(joinpath(repro_dir, "ref_counter.jl")) end end end