Skip to content

Commit

Permalink
allocs -> max RSS
Browse files Browse the repository at this point in the history
  • Loading branch information
juliasloan25 committed Jun 6, 2024
1 parent 8737bd6 commit 8052dd2
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 30 deletions.
8 changes: 2 additions & 6 deletions .buildkite/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,5 @@ temperature and sea ice
### Comparison Metrics
- Simulated years per day (SYPD): The number of years of simulation time we
can run in 1 day of walltime
- CPU simulation object allocations: The allocations in GB of the simulation
object, which contains everything needed to run the simulation.
In the atmosphere-only case, this is the `AtmosSimulation` object.
In the coupled case, this is the `CoupledSimulation` object, which includes
all of the component models, coupler fields, and auxiliary objects. More
information on this object can be found in the `Interfacer` docs.
- CPU maximum resident set size (max RSS): The max RSS memory footprint on the
CPU of this process since it began. This is measured for both CPU and GPU runs.
6 changes: 3 additions & 3 deletions experiments/ClimaEarth/run_amip.jl
Original file line number Diff line number Diff line change
Expand Up @@ -822,9 +822,9 @@ if ClimaComms.iamroot(comms_ctx)
sypd_filename = joinpath(dir_paths.artifacts, "sypd.txt")
write(sypd_filename, "$sypd")

cpu_allocs_GB = Utilities.show_memory_usage(comms_ctx)
cpu_allocs_filename = joinpath(dir_paths.artifacts, "allocations_cpu.txt")
write(cpu_allocs_filename, cpu_allocs_GB)
cpu_max_rss_GB = Utilities.show_memory_usage(comms_ctx)
cpu_max_rss_filename = joinpath(dir_paths.artifacts, "max_rss_cpu.txt")
write(cpu_max_rss_filename, cpu_max_rss_GB)
end

#=
Expand Down
28 changes: 14 additions & 14 deletions experiments/ClimaEarth/user_io/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Our goal here is to output a table displaying some results from benchmark runs
in the coupler. We want to be able to compare between CPU and GPU runs, as well
as between coupled and atmos-only runs. The metrics we want to compare are
SYPD, allocations, and the maximum, median, and mean differences between the
CPU and GPU states.
SYPD, memory usage, allocations, and the maximum, median, and mean differences
between the CPU and GPU states.
The table should look something like this (note that the last 3 columns will be
added in a future PR):
Expand All @@ -12,11 +12,11 @@ added in a future PR):
------------------------------------------------------------------------------------
| | $job_id | $job_id | | | |
| Coupled run | $SYPD | $SYPD | $max_diff | $median_diff | $mean_diff |
| | $cpu_allocs | $cpu_allocs | | | |
| | $cpu_max_rss | $cpu_max_rss| | | |
------------------------------------------------------------------------------------
| | $job_id | $job_id | | | |
| Atmos-only | $SYPD | $SYPD | $max_diff | $median_diff | $mean_diff |
| | $cpu_allocs | $cpu_allocs | | | |
| | $cpu_max_rss | $cpu_max_rss| | | |
------------------------------------------------------------------------------------
=#
Expand Down Expand Up @@ -108,20 +108,20 @@ function get_run_info(parsed_args, run_type)
end

"""
get_sypd_allocs(artifacts_dir)
get_run_data(artifacts_dir)
Read in SYPD and allocations info from artifacts directories.
Read in run data from artifacts directories, currently SYPD and max RSS on the CPU.
"""
function get_sypd_allocs(artifacts_dir)
function get_run_data(artifacts_dir)
# Read in SYPD info
sypd_file = open(joinpath(artifacts_dir, "sypd.txt"), "r")
sypd = round(parse(Float64, read(sypd_file, String)), digits = 4)

# Read in allocations info
cpu_allocs_file = open(joinpath(artifacts_dir, "allocations_cpu.txt"), "r")
cpu_allocs = read(cpu_allocs_file, String)
# Read in max RSS info
cpu_max_rss_file = open(joinpath(artifacts_dir, "max_rss_cpu.txt"), "r")
cpu_max_rss = read(cpu_max_rss_file, String)

return (sypd, cpu_allocs)
return (sypd, cpu_max_rss)
end

"""
Expand All @@ -131,14 +131,14 @@ Append data for a given setup to the table data.
"""
function append_table_data(table_data, setup_id, cpu_job_id, gpu_job_id, cpu_artifacts_dir, gpu_artifacts_dir)
# Get SYPD and allocation info for both input runs
cpu_sypd, cpu_allocs = get_sypd_allocs(cpu_artifacts_dir)
gpu_sypd, gpu_cpu_allocs = get_sypd_allocs(gpu_artifacts_dir)
cpu_sypd, cpu_max_rss = get_run_data(cpu_artifacts_dir)
gpu_sypd, gpu_cpu_max_rss = get_run_data(gpu_artifacts_dir)

# Create rows containing data for these runs
new_table_data = [
["" "job ID:" cpu_job_id gpu_job_id]
[setup_id "SYPD:" cpu_sypd gpu_sypd]
["" "CPU max RSS allocs:" cpu_allocs gpu_cpu_allocs]
["" "CPU max RSS:" cpu_max_rss gpu_cpu_max_rss]
]
return vcat(table_data, new_table_data)
end
Expand Down
8 changes: 4 additions & 4 deletions src/Utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ CPU of this process since it began.
`comms_ctx`: the communication context being used to run the model
"""
function show_memory_usage(comms_ctx)
cpu_allocs_GB = ""
cpu_max_rss_GB = ""
if ClimaComms.iamroot(comms_ctx)
cpu_allocs_GB = "CPU: " * string(round(Sys.maxrss() / 1e9, digits = 3)) * " GiB"
@info cpu_allocs_GB
cpu_max_rss_GB = string(round(Sys.maxrss() / 1e9, digits = 3)) * " GiB"
@info cpu_max_rss_GB
end
return cpu_allocs_GB
return cpu_max_rss_GB
end

end # module
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ if ClimaComms.iamroot(comms_ctx)
sypd_filename = joinpath(output_dir, "sypd.txt")
write(sypd_filename, "$sypd")

cpu_allocs_GB = Utilities.show_memory_usage(comms_ctx)
cpu_allocs_filename = joinpath(output_dir, "allocations_cpu.txt")
write(cpu_allocs_filename, cpu_allocs_GB)
cpu_max_rss_GB = Utilities.show_memory_usage(comms_ctx)
cpu_max_rss_filename = joinpath(output_dir, "max_rss_cpu.txt")
write(cpu_max_rss_filename, cpu_max_rss_GB)
end

0 comments on commit 8052dd2

Please sign in to comment.