Skip to content

Commit

Permalink
Merge pull request #3496 from CliMA/ck/repro_fixes
Browse files Browse the repository at this point in the history
Reproducibility infrastructure fixes
  • Loading branch information
charleskawczynski authored Dec 26, 2024
2 parents 29ec888 + 6d92728 commit e84ed15
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 43 deletions.
5 changes: 4 additions & 1 deletion reproducibility_tests/ref_counter.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
191
192

# **README**
#
Expand All @@ -21,6 +21,9 @@

#=
192
- Reproducibility infrastructure fixes.
191
- Reproducibility infrastructure debugging.
Expand Down
20 changes: 14 additions & 6 deletions reproducibility_tests/reproducibility_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,15 @@ function error_if_dissimilar_dicts(dicts, dict)
end
end

all_files_in_dir(dir) =
map((root, dirs, files) -> joinpath(root, file), walkdir(dir))
function all_files_in_dir(dir)
all_files = String[]
for (root, dirs, files) in walkdir(dir)
for file in files
push!(all_files, joinpath(root, file))
end
end
return all_files
end

function no_comparison_error(dirs, non_existent_files)
msg = "\n\n"
Expand Down Expand Up @@ -252,11 +259,12 @@ function export_reproducibility_results(
joinpath(@__DIR__, "ref_counter.jl"),
),
skip::Bool = !haskey(ENV, "BUILDKITE_COMMIT"),
repro_folder = "reproducibility_bundle",
)
repro_folder = joinpath(computed_dir, "reproducibility_bundle")
data_file_computed = joinpath(repro_folder, computed_filename)
repro_dir = joinpath(computed_dir, repro_folder)
data_file_computed = joinpath(repro_dir, computed_filename)

mkpath(repro_folder)
mkpath(repro_dir)
hdfwriter = InputOutput.HDF5Writer(data_file_computed, comms_ctx)
InputOutput.write!(hdfwriter, field_vec, name)
Base.close(hdfwriter)
Expand All @@ -277,7 +285,7 @@ function export_reproducibility_results(
for (computed_mse, dir) in zip(computed_mses, dirs)
commit_hash = basename(dirname(dir))
computed_mse_file =
joinpath(repro_folder, "computed_mse_$commit_hash.json")
joinpath(repro_dir, "computed_mse_$commit_hash.json")

open(computed_mse_file, "w") do io
JSON.print(io, computed_mse)
Expand Down
7 changes: 5 additions & 2 deletions reproducibility_tests/reproducibility_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ function move_data_to_save_dir(;
ref_counter_PR = read_ref_counter(ref_counter_file_PR),
skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
n_hash_characters = 7,
repro_folder = "reproducibility_bundle",
)
buildkite_ci || return nothing

Expand All @@ -419,15 +420,17 @@ function move_data_to_save_dir(;
mkpath(dest_root)
dest_dir = joinpath(dest_root, commit_sha)
mkpath(dest_dir)
dest_repro = joinpath(dest_dir, repro_folder)
mkpath(dest_repro)
# Always move reproducibility data, so that we
# can compare against multiple references
for src in dirs_src
dst = joinpath(dest_dir, basename(src))
dst = joinpath(dest_repro, basename(src))
mv(src, dst; force = true)
debug_reproducibility() &&
@info "Reproducibility: File $src moved to $dst"
end
ref_counter_file_main = joinpath(dest_dir, "ref_counter.jl")
ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl")
mv(ref_counter_file_PR, ref_counter_file_main; force = true)
else
if debug_reproducibility()
Expand Down
46 changes: 12 additions & 34 deletions test/unit_reproducibility_infra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,8 @@ if pkgversion(ClimaCore) ≥ v"0.14.18"
@test source_checksum(hash1) == source_checksum(computed_dir)
@test source_checksum(hash2) == source_checksum(computed_dir)

repro_folder = "repro_bundle"
repro_dir = joinpath(save_dir, "hash_new", repro_folder)
move_data_to_save_dir(;
dest_root = save_dir,
buildkite_ci = true,
Expand All @@ -1212,25 +1214,12 @@ if pkgversion(ClimaCore) ≥ v"0.14.18"
"ref_counter.jl",
),
ref_counter_PR = 3,
repro_folder,
skip = false,
)
@test isfile(
joinpath(
save_dir,
"hash_new",
"job_id_1",
"ref_prog_state.hdf5",
),
)
@test isfile(
joinpath(
save_dir,
"hash_new",
"job_id_2",
"ref_prog_state.hdf5",
),
)
@test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl"))
@test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5"))
@test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5"))
@test isfile(joinpath(repro_dir, "ref_counter.jl"))
end
end

Expand Down Expand Up @@ -1283,6 +1272,8 @@ if pkgversion(ClimaCore) ≥ v"0.14.18"
@test source_checksum(hash1) == source_checksum(computed_dir)
@test source_checksum(hash2) == source_checksum(computed_dir)

repro_folder = "repro_bundle"
repro_dir = joinpath(save_dir, "hash_new", repro_folder)
move_data_to_save_dir(;
dest_root = save_dir,
buildkite_ci = true,
Expand All @@ -1295,26 +1286,13 @@ if pkgversion(ClimaCore) ≥ v"0.14.18"
ref_counter_file_dir,
"ref_counter.jl",
),
repro_folder,
ref_counter_PR = 3,
skip = false,
)
@test isfile(
joinpath(
save_dir,
"hash_new",
"job_id_1",
"ref_prog_state.hdf5",
),
)
@test isfile(
joinpath(
save_dir,
"hash_new",
"job_id_2",
"ref_prog_state.hdf5",
),
)
@test isfile(joinpath(save_dir, "hash_new", "ref_counter.jl"))
@test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5"))
@test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5"))
@test isfile(joinpath(repro_dir, "ref_counter.jl"))
end
end
end

0 comments on commit e84ed15

Please sign in to comment.