Skip to content

Commit

Permalink
Add script to calculate algorithm duration distribution from graphml (#…
Browse files Browse the repository at this point in the history
…25)

* add script for calculating algorithm execution distribution from graphml or gaudi timeline

* add algorithm execution time plot for ATLAS test job
  • Loading branch information
m-fila authored Jul 19, 2024
1 parent afcc5b3 commit 09ace36
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 0 deletions.
11 changes: 11 additions & 0 deletions data/ATLAS/q449/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,14 @@ Standard reconstruction test job
```sh
ATHENA_CORE_NUMBER=8 Reco_tf.py --multithreaded --AMIConfig q449 --preExec "ConfigFlags.PerfMon.doFullMonMT=True" --postExec "from AthenaConfiguration.ComponentFactory import CompFactory;from GaudiHive.GaudiHiveConf import PrecedenceSvc; cfg.addService(CompFactory.PrecedenceSvc(DumpPrecedenceRules=True))"
```
# Algorithm execution duration

| | |
|-|-|
| min | 9.08e-07 s |
| median | 1.40e-04 s |
| mean | 9.01e-03 s |
| max | 2.15e+01 s |
| std | 1.77e-01 s |

![Algorithm execution duration](img/alg_exec_dist.png)
Binary file added data/ATLAS/q449/img/alg_exec_dist.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions scripts/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
96 changes: 96 additions & 0 deletions scripts/gaudi_alg_exec_dist.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env julia

using CSV
using DataFrames
using Statistics
using Plots
using ArgParse
using Printf
using EzXML
using MetaGraphs

include(joinpath(@__DIR__, "../deps/GraphMLReader.jl/src/GraphMLReader.jl"))

function parse_args(args)
s = ArgParseSettings(description=
"""
Calculate distributions of Gaudi algorithm execution duration time
from a timeline extracted with Gaudi TimelineSvc or data-flow graph
""")

@add_arg_table! s begin
"input"
help = "Input Gaudi timeline CSV file or data-flow graph GraphML file"
arg_type = String
required = true

"output"
help = "Output histogram file"
arg_type = String
required = false
end
return ArgParse.parse_args(args, s)
end

function durations_from_csv(filename)
df = CSV.read(filename, DataFrame)
rename!(df, "#start" => :start)
df.duration = (df.end .- df.start) ./ 1e9
return df.duration
end

function durations_from_graphml(filename)
graph = GraphMLReader.loadgraphml(filename, "G")
algorithm_vertices = MetaGraphs.filter_vertices(graph, :type, "Algorithm")
return [get_prop(graph, vertex, :runtime_average_s) for vertex in algorithm_vertices if has_prop(graph, vertex, :runtime_average_s)]
end

function main(args)
parsed_args = parse_args(args)

input_file = parsed_args["input"]
ext = splitext(input_file)[2]
durations = []
if ext == ".csv"
durations = durations_from_csv(input_file)
elseif ext == ".graphml"
durations = durations_from_graphml(input_file)
end

n = length(durations)
min_duration = minimum(durations)
max_duration = maximum(durations)
println("Entries: $n")
println("Algorithm execution duration:")
@printf "\tmin:\t %.2e s\n" min_duration
@printf "\tmedian:\t %.2e s\n" median(durations)
@printf "\tmean:\t %.2e s\n" mean(durations)
@printf "\tmax:\t %.2e s\n" max_duration
@printf "\tstd:\t %.2e s\n" std(durations)

output_file = parsed_args["output"]
if isnothing(output_file)
return
end

if min_duration <= 0
@warn "Skipping negative and zero durations"
positive_durations = filter(x -> x > 0, durations)
min_duration = minimum(positive_durations)
max_duration = maximum(positive_durations)
n = length(positive_durations)
end
num_bins = sqrt(n) |> ceil |> Int

bin_edges = exp10.(range(log10(min_duration), stop=log10(max_duration), length=num_bins + 3))

histogram(durations; label="", bin=bin_edges, xscale=:log10, xlim=extrema(bin_edges),
title="Algorithm execution duration", xlabel="Duration (s)", ylabel="Counts",
xguidefonthalign=:right, yguidefontvalign=:top)
savefig(output_file)
@info "Histogram saved to $output_file"
end

if abspath(PROGRAM_FILE) == @__FILE__
main(ARGS)
end

0 comments on commit 09ace36

Please sign in to comment.