Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to calculate algorithm duration distribution from graphml #25

Merged
merged 2 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions data/ATLAS/q449/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,14 @@ Standard reconstruction test job
```sh
ATHENA_CORE_NUMBER=8 Reco_tf.py --multithreaded --AMIConfig q449 --preExec "ConfigFlags.PerfMon.doFullMonMT=True" --postExec "from AthenaConfiguration.ComponentFactory import CompFactory;from GaudiHive.GaudiHiveConf import PrecedenceSvc; cfg.addService(CompFactory.PrecedenceSvc(DumpPrecedenceRules=True))"
```
# Algorithm execution duration

| | |
|-|-|
| min | 9.08e-07 s |
| median | 1.40e-04 s |
| mean | 9.01e-03 s |
| max | 2.15e+01 s |
| std | 1.77e-01 s |

![Algorithm execution duration](img/alg_exec_dist.png)
Binary file added data/ATLAS/q449/img/alg_exec_dist.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions scripts/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
96 changes: 96 additions & 0 deletions scripts/gaudi_alg_exec_dist.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env julia

using CSV
using DataFrames
using Statistics
using Plots
using ArgParse
using Printf
using EzXML
using MetaGraphs

include(joinpath(@__DIR__, "../deps/GraphMLReader.jl/src/GraphMLReader.jl"))

function parse_args(args)
s = ArgParseSettings(description=
"""
Calculate distributions of Gaudi algorithm execution duration time
from a timeline extracted with Gaudi TimelineSvc or data-flow graph
""")

@add_arg_table! s begin
"input"
help = "Input Gaudi timeline CSV file or data-flow graph GraphML file"
arg_type = String
required = true

"output"
help = "Output histogram file"
arg_type = String
required = false
end
return ArgParse.parse_args(args, s)
end

function durations_from_csv(filename)
df = CSV.read(filename, DataFrame)
rename!(df, "#start" => :start)
df.duration = (df.end .- df.start) ./ 1e9
return df.duration
end

function durations_from_graphml(filename)
graph = GraphMLReader.loadgraphml(filename, "G")
algorithm_vertices = MetaGraphs.filter_vertices(graph, :type, "Algorithm")
return [get_prop(graph, vertex, :runtime_average_s) for vertex in algorithm_vertices if has_prop(graph, vertex, :runtime_average_s)]
end

function main(args)
parsed_args = parse_args(args)

input_file = parsed_args["input"]
ext = splitext(input_file)[2]
durations = []
if ext == ".csv"
durations = durations_from_csv(input_file)
elseif ext == ".graphml"
durations = durations_from_graphml(input_file)
end

n = length(durations)
min_duration = minimum(durations)
max_duration = maximum(durations)
println("Entries: $n")
println("Algorithm execution duration:")
@printf "\tmin:\t %.2e s\n" min_duration
@printf "\tmedian:\t %.2e s\n" median(durations)
@printf "\tmean:\t %.2e s\n" mean(durations)
@printf "\tmax:\t %.2e s\n" max_duration
@printf "\tstd:\t %.2e s\n" std(durations)

output_file = parsed_args["output"]
if isnothing(output_file)
return
end

if min_duration <= 0
@warn "Skipping negative and zero durations"
positive_durations = filter(x -> x > 0, durations)
min_duration = minimum(positive_durations)
max_duration = maximum(positive_durations)
n = length(positive_durations)
end
num_bins = sqrt(n) |> ceil |> Int

bin_edges = exp10.(range(log10(min_duration), stop=log10(max_duration), length=num_bins + 3))

histogram(durations; label="", bin=bin_edges, xscale=:log10, xlim=extrema(bin_edges),
title="Algorithm execution duration", xlabel="Duration (s)", ylabel="Counts",
xguidefonthalign=:right, yguidefontvalign=:top)
savefig(output_file)
@info "Histogram saved to $output_file"
end

if abspath(PROGRAM_FILE) == @__FILE__
main(ARGS)
end