From 4d555c3a11e6002f251c2395aff3c47f98b3a797 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Tue, 31 Dec 2024 10:59:21 -0500 Subject: [PATCH] More reproducibility fixes --- .../reproducibility_utils.jl | 146 +++++++++++++----- test/unit_reproducibility_infra.jl | 115 ++++++++++++++ 2 files changed, 220 insertions(+), 41 deletions(-) diff --git a/reproducibility_tests/reproducibility_utils.jl b/reproducibility_tests/reproducibility_utils.jl index 8db107c62a..20058ba8a9 100644 --- a/reproducibility_tests/reproducibility_utils.jl +++ b/reproducibility_tests/reproducibility_utils.jl @@ -405,19 +405,19 @@ rm_folder(path; strip_folder) = """ move_data_to_save_dir(; - dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci", - commit = get(ENV, "BUILDKITE_COMMIT", nothing), branch = get(ENV, "BUILDKITE_BRANCH", nothing), in_merge_queue = startswith(branch, "gh-readonly-queue/main/"), dirs_src, - strip_folder = Pair("output_active", ""), ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"), ref_counter_PR = read_ref_counter(ref_counter_file_PR), skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), n_hash_characters = 7, repro_folder = "reproducibility_bundle", + strip_folder = "output_active", ) Moves data in the following way: @@ -444,19 +444,19 @@ Data movement will occur when this function is called: code in the latest comparable reference """ function move_data_to_save_dir(; - dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci", - commit = get(ENV, "BUILDKITE_COMMIT", nothing), branch = get(ENV, "BUILDKITE_BRANCH", nothing), in_merge_queue = startswith(branch, "gh-readonly-queue/main/"), dirs_src, - strip_folder = "output_active", ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"), ref_counter_PR = read_ref_counter(ref_counter_file_PR), skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), n_hash_characters = 7, repro_folder = "reproducibility_bundle", + strip_folder = "output_active", ) buildkite_ci || return nothing @@ -471,46 +471,29 @@ function move_data_to_save_dir(; branch == "main" && source_has_changed(; n = 1, root_dir = dest_root, ref_counter_PR, skip) ) - commit_sha = commit[1:min(n_hash_characters, length(commit))] - mkpath(dest_root) - dest_dir = joinpath(dest_root, commit_sha) - mkpath(dest_dir) - dest_repro = joinpath(dest_dir, repro_folder) - mkpath(dest_repro) - # Always move reproducibility data, so that we - # can compare against multiple references - for src in dirs_src - dst = joinpath(dest_repro, basename(src)) - debug_reproducibility() && @info "Repro: moving $src to $dst" - mv(src, dst; force = true) - end - for dst in all_files_in_dir(dest_repro) - dst_new = rm_folder(dst; strip_folder) - if debug_reproducibility() - @show isfile(dst) - @show dst - @show dst_new - end - if dst ≠ dst_new - mkpath(dirname(dst_new)) - debug_reproducibility() && - @info "Repro: re-moving $dst to $dst_new" - mv(dst, dst_new; force = true) - end + (; files_src, files_dest) = save_dir_in_out_list(; + dirs_src, + dest_root, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) + for (src, dest) in zip(files_src, files_dest) + @assert isfile(src) + mkpath(dirname(dest)) + mv(src, dest; force = true) end + dest_repro = destination_directory(; + dest_root, + commit, + n_hash_characters, + repro_folder, + ) ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl") debug_reproducibility() && @info "Repro: moving $ref_counter_file_PR to $ref_counter_file_main" mv(ref_counter_file_PR, ref_counter_file_main; force = true) - if debug_reproducibility() - println("####################### SRC") - for src in dirs_src - @info(string_all_files_in_dir(src)) - end - println("####################### DST") - @info(string_all_files_in_dir(dest_repro)) - println("#######################") - end else if debug_reproducibility() @warn "Repro: skipping data movement" @@ -526,6 +509,87 @@ function move_data_to_save_dir(; end end + +""" + save_dir_transform( + src; + job_id, + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", + strip_folder = "output_active", + ) + +Returns the output file, to be saved, given: + - `src` the source file + - `job_id` the job ID + - `dest_root` the destination root directory + - `commit` the commit hash + - `n_hash_characters` truncates the commit hash to given number of characters + - `repro_folder` reproducibility folder + - `strip_folder` folder to strip out in output path +""" +function save_dir_transform( + src; + job_id, + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", + strip_folder = "output_active", +) + dest_repro = destination_directory(; + dest_root, + commit, + n_hash_characters, + repro_folder, + ) + src_filename = basename(src) + dst = joinpath(dest_repro, job_id, src_filename) + return rm_folder(dst; strip_folder) +end + +""" + destination_directory() +""" +function destination_directory(; + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", +) + commit_sha = commit[1:min(n_hash_characters, length(commit))] + dest_dir = joinpath(dest_root, commit_sha) + return joinpath(dest_dir, repro_folder) +end + +""" + save_dir_in_out_list + +Returns two vectors of strings, containing input and output files, for moving +data from the computed to saved directories. + +```julia +(; files_src, files_dest) = save_dir_in_out_list(; dirs_src) +for (src, dest) in zip(files_src, files_dest) + mv(src, dest; force = true) +end +``` +""" +function save_dir_in_out_list(; dirs_src, kwargs...) + files_dest = String[] + files_src = String[] + for src_dir in dirs_src + job_id = basename(src_dir) + for src in all_files_in_dir(src_dir) + push!(files_src, src) + push!(files_dest, save_dir_transform(src; job_id, kwargs...)) + end + end + return (; files_src, files_dest) +end + parse_file(file) = eval(Meta.parse(join(readlines(file)))) # parse_file(file) = parse_file_json(file) # doesn't work for some reason parse_file_json(file) = diff --git a/test/unit_reproducibility_infra.jl b/test/unit_reproducibility_infra.jl index 145f98fac8..2411a4d299 100644 --- a/test/unit_reproducibility_infra.jl +++ b/test/unit_reproducibility_infra.jl @@ -683,6 +683,121 @@ end end end +@testset "Reproducibility infrastructure: save_dir_transform" begin + make_and_cd() do dir + job_id = "job_id" + commit = "commit_sha" + n_hash_characters = 10 + output = "output_active" + strip_folder = output + repro_folder = "rbundle" + src = "$job_id/$output/$repro_folder/prog_state.hdf5" + dst = "$commit/$repro_folder/$job_id/prog_state.hdf5" + @test save_dir_transform( + src; + dest_root = dir, + job_id, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) == joinpath(dir, dst) + + job_id = "job_id" + commit = "commit_sha" + n_hash_characters = 10 + output = "output_active" + strip_folder = output + repro_folder = "rbundle" + src = "$job_id/$output/prog_state.hdf5" + dst = "$commit/$repro_folder/$job_id/prog_state.hdf5" + @test save_dir_transform( + src; + dest_root = dir, + job_id, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) == joinpath(dir, dst) + end +end + +@testset "Reproducibility infrastructure: save_dir_in_out_list" begin + mktempdir2_cd_computed() do (save_dir, computed_dir) + hash1 = joinpath(save_dir, "hash1") + hash2 = joinpath(save_dir, "hash2") + make_file_with_contents(hash1, "file_x.jl", "abc") + make_file_with_contents(hash1, "file_y.jl", "abc") + make_file_with_contents(hash1, "file_z.jl", "abc") + make_ref_file_counter(3, hash1, "repro_bundle") + + make_file_with_contents(hash2, "file_x.jl", "abc") + make_file_with_contents(hash2, "file_y.jl", "abc") + make_file_with_contents(hash2, "file_z.jl", "abc") + make_ref_file_counter(3, hash2, "repro_bundle") + + make_file_with_contents(computed_dir, "file_x.jl", "abc") + make_file_with_contents(computed_dir, "file_y.jl", "abc") + make_file_with_contents(computed_dir, "file_z.jl", "abc") + ref_counter_file_dir = + make_ref_file_counter(3, computed_dir, "repro_bundle") + job_id_1 = joinpath(computed_dir, "repro_bundle", "job_id_1") + job_id_2 = joinpath(computed_dir, "repro_bundle", "job_id_2") + + mkpath(joinpath(job_id_1, "output_active")) + file = joinpath(job_id_1, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + mkpath(joinpath(job_id_2, "output_active")) + file = joinpath(job_id_2, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + @test source_checksum(hash1) == source_checksum(computed_dir) + @test source_checksum(hash2) == source_checksum(computed_dir) + + repro_folder = "repro_bundle" + (; files_src, files_dest) = save_dir_in_out_list(; + dirs_src = [job_id_1, job_id_2], + dest_root = save_dir, + commit = "commit_sha", + n_hash_characters = 10, + repro_folder, + strip_folder = "output_active", + ) + + @test files_src[1] == joinpath( + computed_dir, + "repro_bundle", + "job_id_1", + "output_active", + "ref_prog_state.dat", + ) + @test files_src[2] == joinpath( + computed_dir, + "repro_bundle", + "job_id_2", + "output_active", + "ref_prog_state.dat", + ) + @test files_dest[1] == joinpath( + save_dir, + "commit_sha", + "repro_bundle", + "job_id_1", + "ref_prog_state.dat", + ) + @test files_dest[1] == joinpath( + save_dir, + "commit_sha", + "repro_bundle", + "job_id_1", + "ref_prog_state.dat", + ) + + end +end + using ClimaComms using ClimaCore: Spaces, Fields, Grids, InputOutput using ClimaCore