Some initial benchmark results #37

odow · 2024-11-19T23:25:29Z

import DataFrames
import JSON
import Statistics
function sgm(x::Vector{BigFloat}; sh::BigFloat)
    return exp(sum(log(max(1, xi + sh)) for xi in x) / length(x)) - sh
end
sgm(x; sh = 10.0) = round(Float64(sgm(BigFloat.(x); sh = big(sh))); digits = 2)

summary_stats(x) = (Statistics.mean(x), Statistics.std(x) / sqrt(length(x)))
output_filename = "output.jsonl"
lines = readlines(output_filename)
while isempty(lines[end])
    pop!(lines)
end
df = DataFrames.DataFrame(JSON.parse.(lines))
df.random_seed = map(r -> r["random_seed"], df.options)
df.presolve = map(r -> r["presolve"], df.options)
df.category = map(df.filename) do filename
    return split(split(filename, "/")[end], "_")[1]
end

function compute_sgm(df, key, options)
    sort!(df, options)
    DataFrames.combine(
        DataFrames.groupby(
            DataFrames.combine(
                DataFrames.groupby(df, vcat(options, :random_seed)),
                key => sgm => :sgm,
                key => length => :N
            ),
            options,
        ),
        :sgm => summary_stats,
        :N => extrema,
    )
end

compute_sgm(df, :julia_total_time, [:category, :presolve])
compute_sgm(df, :julia_total_time, [:presolve])

compute_sgm(df, :primal_dual_integral, [:category, :presolve])
compute_sgm(df, :primal_dual_integral, [:presolve])

julia> compute_sgm(df, :julia_total_time, [:category, :presolve])
8×4 DataFrame
 Row │ category           presolve  sgm_summary_stats   N_extrema 
     │ SubStrin…          String    Tuple…              Tuple…    
─────┼────────────────────────────────────────────────────────────
   1 │ GenX               off       (3422.37, 46.22)    (10, 10)
   2 │ GenX               on        (2959.36, 177.535)  (9, 10)
   3 │ PowerModelsOTS     off       (5429.49, 1770.69)  (5, 5)
   4 │ PowerModelsOTS     on        (3527.66, 743.44)   (5, 5)
   5 │ Sienna             off       (835.15, 23.0)      (36, 36)
   6 │ Sienna             on        (856.79, 36.96)     (35, 36)
   7 │ TulipaEnergyModel  off       (321.885, 0.255)    (5, 5)
   8 │ TulipaEnergyModel  on        (329.99, 5.42)      (6, 6)

julia> compute_sgm(df, :julia_total_time, [:presolve])
2×3 DataFrame
 Row │ presolve  sgm_summary_stats  N_extrema 
     │ String    Tuple…             Tuple…    
─────┼────────────────────────────────────────
   1 │ off       (1164.47, 58.835)  (56, 56)
   2 │ on        (1083.18, 25.44)   (55, 57)

julia> compute_sgm(df, :primal_dual_integral, [:category, :presolve])
8×4 DataFrame
 Row │ category           presolve  sgm_summary_stats  N_extrema 
     │ SubStrin…          String    Tuple…             Tuple…    
─────┼───────────────────────────────────────────────────────────
   1 │ GenX               off       (828.25, 39.0)     (10, 10)
   2 │ GenX               on        (767.53, 118.77)   (9, 10)
   3 │ PowerModelsOTS     off       (49.87, 7.67)      (5, 5)
   4 │ PowerModelsOTS     on        (79.715, 2.105)    (5, 5)
   5 │ Sienna             off       (52.845, 0.405)    (36, 36)
   6 │ Sienna             on        (47.295, 0.595)    (35, 36)
   7 │ TulipaEnergyModel  off       (1.695, 0.075)     (5, 5)
   8 │ TulipaEnergyModel  on        (0.525, 0.015)     (6, 6)

julia> compute_sgm(df, :primal_dual_integral, [:presolve])
2×3 DataFrame
 Row │ presolve  sgm_summary_stats  N_extrema 
     │ String    Tuple…             Tuple…    
─────┼────────────────────────────────────────
   1 │ off       (75.46, 1.39)      (56, 56)
   2 │ on        (67.39, 3.73)      (55, 57)

I only did random_seed=1 and random_seed=2. I'll bump it up for the next run. Just working on validating the workflow.

There are a couple of failures that I need to fix, where the machine shut down because of the time limit without recording any data.

The text was updated successfully, but these errors were encountered:

joaquimg · 2024-11-20T00:44:31Z

I am confused with random_seed nothing mentions it in benchmar/main.jl

odow · 2024-11-20T00:49:27Z

My WIP HPC script is basically this:

include(joinpath(dirname(@__DIR__), "src", "Maatai.jl"))

import .Maatai

root = expanduser("~/git/open-energy-modeling-benchmarks")
instances = filter(endswith(".mps.gz"), readdir(joinpath(root, "instances")))
study = Maatai.Study(;
    root = @__DIR__,
    name = "oem_benchmarks",
    inputs = Dict{String,Any}[
        Dict(
            "ROOT" => root,
            "FILENAME" => filename,
            "RANDOM_SEED" => seed,
            "PRESOLVE" => presolve
        )
        for filename in instances
        for seed in 1:2
        for presolve in ["on", "off"]
    ],
    script =
        raw"""
        julia                                               \
            --project=${ROOT}/benchmark                     \
            ${ROOT}/benchmark/main.jl                       \
            --instance=${FILENAME}                          \
            --output_filename=${OUTPUT_DIR}/output.jsonl    \
            --random_seed=${RANDOM_SEED}                    \
            --presolve=${PRESOLVE}                          \
            --time_limit=7200
        """,
)

platform = Maatai.SLURM(
    time = "02:30:00",
    nodes = 1,
    ntasks_per_node = 1,
    account = "XXXXX",
    partition = "standard",
    qos = "standard",
)

Maatai.plan(study, platform)

I hit the limit on how many jobs I can submit if I try more seeds, so that's the next task.

joaquimg · 2024-11-20T00:54:07Z

got it, args are being forwarded do HiGHS

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Some initial benchmark results #37

Some initial benchmark results #37

odow commented Nov 19, 2024

joaquimg commented Nov 20, 2024

odow commented Nov 20, 2024

joaquimg commented Nov 20, 2024

Some initial benchmark results #37

Some initial benchmark results #37

Comments

odow commented Nov 19, 2024

joaquimg commented Nov 20, 2024

odow commented Nov 20, 2024

joaquimg commented Nov 20, 2024