From d9d24ec92b153a684c5f55b0113bae3c06a87441 Mon Sep 17 00:00:00 2001
From: "Mateusz P. Nowak" <112635238+mateuszpn@users.noreply.github.com>
Date: Mon, 30 Sep 2024 16:09:23 +0200
Subject: [PATCH] sycl-bench added & ascii bar chart (#2115)
---
scripts/benchmarks/benches/base.py | 3 +-
scripts/benchmarks/benches/compute.py | 4 +-
scripts/benchmarks/benches/quicksilver.py | 2 +-
scripts/benchmarks/benches/syclbench.py | 370 ++++++++++++++++++++++
scripts/benchmarks/benches/velocity.py | 5 +-
scripts/benchmarks/main.py | 73 ++++-
scripts/benchmarks/output.py | 229 ++++++++-----
scripts/benchmarks/utils/utils.py | 3 -
8 files changed, 595 insertions(+), 94 deletions(-)
create mode 100644 scripts/benchmarks/benches/syclbench.py
diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py
index e4377c8b65..98f746c479 100644
--- a/scripts/benchmarks/benches/base.py
+++ b/scripts/benchmarks/benches/base.py
@@ -6,7 +6,6 @@
import os
import shutil
from pathlib import Path
-import subprocess # nosec B404
from .result import Result
from .options import options
from utils.utils import run
@@ -57,7 +56,7 @@ def lower_is_better(self):
def setup(self):
raise NotImplementedError()
- def run(self, env_vars) -> Result:
+ def run(self, env_vars) -> list[Result]:
raise NotImplementedError()
def teardown(self):
diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py
index cf164721a6..98be6c0df0 100644
--- a/scripts/benchmarks/benches/compute.py
+++ b/scripts/benchmarks/benches/compute.py
@@ -65,7 +65,7 @@ def setup(self):
self.bench.setup()
self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name)
- def run(self, env_vars) -> Result:
+ def run(self, env_vars) -> list[Result]:
command = [
f"{self.benchmark_bin}",
f"--test={self.test}",
@@ -78,7 +78,7 @@ def run(self, env_vars) -> Result:
result = self.run_bench(command, env_vars)
(label, mean) = self.parse_output(result)
- return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
+ return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ]
def parse_output(self, output):
csv_file = io.StringIO(output)
diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py
index 7e1f65ee1d..c864e6c368 100644
--- a/scripts/benchmarks/benches/quicksilver.py
+++ b/scripts/benchmarks/benches/quicksilver.py
@@ -15,7 +15,7 @@ def __init__(self, vb: VelocityBench):
super().__init__("QuickSilver", "qs", vb)
self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering")
- def run(self, env_vars) -> Result:
+ def run(self, env_vars) -> list[Result]:
# TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0':
return None
diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py
new file mode 100644
index 0000000000..f52c68c2dd
--- /dev/null
+++ b/scripts/benchmarks/benches/syclbench.py
@@ -0,0 +1,370 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import os
+import csv
+import io
+from utils.utils import run, git_clone, create_build_path
+from .base import Benchmark
+from .result import Result
+from .options import options
+
+class SyclBench:
+ def __init__(self, directory):
+ self.directory = directory
+ self.built = False
+ self.setup()
+ return
+
+ def setup(self):
+ if self.built:
+ return
+
+ build_path = os.path.join(self.directory, 'sycl-bench-build')
+ create_build_path(build_path, '')
+
+ repo_path = git_clone(self.directory, "sycl-bench-repo", "https://github.com/mateuszpn/sycl-bench.git", "1e6ab2cfd004a72c5336c26945965017e06eab71")
+
+ configure_command = [
+ "cmake",
+ f"-B {build_path}",
+ f"-S {repo_path}",
+ f"-DCMAKE_BUILD_TYPE=Release",
+ f"-DCMAKE_CXX_COMPILER={options.sycl}/bin/clang++",
+ f"-DCMAKE_C_COMPILER={options.sycl}/bin/clang",
+ f"-DSYCL_IMPL=dpcpp"
+ ]
+
+ print(f"Run {configure_command}")
+ run(configure_command, add_sycl=True)
+
+ print(f"Run cmake --build {build_path}")
+ run(f"cmake --build {build_path} -j", add_sycl=True)
+
+ self.built = True
+ self.bins = build_path
+
+class SyclBenchmark(Benchmark):
+ def __init__(self, bench, name, test):
+ self.bench = bench
+ self.bench_name = name
+ self.test = test
+ super().__init__(bench.directory)
+
+ def bin_args(self) -> list[str]:
+ return []
+
+ def extra_env_vars(self) -> dict:
+ return {}
+
+ def unit(self):
+ return "ms"
+
+ def setup(self):
+ self.bench.setup()
+ self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name)
+
+ def run(self, env_vars) -> list[Result]:
+ outputfile = f"{self.bench.directory}/{self.test}.csv"
+ command = [
+ f"{self.benchmark_bin}",
+ f"--warmup-run",
+ f"--num-runs=3",
+ f"--output={outputfile}"
+ ]
+ bin_dir = self.bench.bins
+
+ command += self.bin_args()
+ env_vars.update(self.extra_env_vars())
+
+ # no output to stdout, all in outputfile
+ self.run_bench(command, env_vars)
+
+ with open(outputfile, 'r') as f:
+ reader = csv.reader(f)
+ res_list = []
+ for row in reader:
+ if not row[0].startswith('#'):
+ res_list.append(
+ Result(label=row[0],
+ value=float(row[12]) * 1000, # convert to ms
+ command=command,
+ env=env_vars,
+ stdout=row))
+
+ return res_list
+
+ def teardown(self):
+ return
+
+ def name(self):
+ return self.test
+
+class Arith(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "arith", "Arith_int32_512")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=16384",
+ ]
+
+class TwoDConvolution(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "2DConvolution", "2DConvolution")
+
+class Two_mm(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "2mm", "2mm")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=512",
+ ]
+
+class Three_mm(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "3mm", "3mm")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=512",
+ ]
+
+class Atax(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "atax", "Atax")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=8192",
+ ]
+
+class Atomic_reduction(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "atomic_reduction", "ReductionAtomic_fp64")
+
+class Bicg(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "bicg", "Bicg")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=20480",
+ ]
+
+class Correlation(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "correlation", "Correlation")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=2048",
+ ]
+
+class Covariance(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "covariance", "Covariance")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=2048",
+ ]
+
+class Gemm(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "gemm", "Gemm")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=8192",
+ ]
+
+class Gesumv(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "gesummv", "Gesummv")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=8192",
+ ]
+
+class Gramschmidt(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "gramschmidt", "Gramschmidt")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=512",
+ ]
+
+class KMeans(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "kmeans", "Kmeans")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=700000000",
+ ]
+
+class LinRegCoeff(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "lin_reg_coeff", "LinearRegressionCoeff")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=1638400000",
+ ]
+
+class LinRegError(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "lin_reg_error", "LinearRegression")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=640000",
+ ]
+
+class MatmulChain(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "matmulchain", "MatmulChain")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=2048",
+ ]
+
+class MolDyn(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "mol_dyn", "MolecularDynamics")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=8196",
+ ]
+
+class Mvt(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "mvt", "Mvt")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=32767",
+ ]
+
+class NBody(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "nbody", "NBody_")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=81920",
+ ]
+
+class Sf(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "sf", "sf_16")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=--size=100000000",
+ ]
+
+class Syr2k(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "syr2k", "Syr2k")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=6144",
+ ]
+
+class Syrk(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "syrk", "Syrk")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=4096",
+ ]
+
+# multi benchmarks
+class Blocked_transform(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "blocked_transform", "BlockedTransform_multi")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=16384",
+ f"--local=1024"
+ ]
+
+class DagTaskI(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "dag_task_throughput_independent", "IndependentDAGTaskThroughput_multi")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=32768",
+ ]
+
+class DagTaskS(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "dag_task_throughput_sequential", "DAGTaskThroughput_multi")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=327680",
+ ]
+
+class HostDevBandwidth(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "host_device_bandwidth", "HostDeviceBandwidth_multi")
+
+class LocalMem(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "local_mem", f"LocalMem_multi")
+
+ def bin_args(self) -> list[str]:
+ return [
+ f"--size=512",
+ ]
+
+class Pattern_L2(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "pattern_L2", "L2_multi")
+
+class Reduction(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "reduction", "Pattern_Reduction_multi")
+
+class ScalarProd(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "scalar_prod", "ScalarProduct_multi")
+
+class SegmentReduction(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "segmentedreduction", "Pattern_SegmentedReduction_multi")
+
+class UsmAccLatency(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "usm_accessors_latency", "USM_Latency_multi")
+
+class UsmAllocLatency(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "usm_allocation_latency", "USM_Allocation_latency_multi")
+
+class UsmInstrMix(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "usm_instr_mix", "USM_Instr_Mix_multi")
+
+class UsmPinnedOverhead(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "usm_pinned_overhead", "USM_Pinned_Overhead_multi")
+
+class VecAdd(SyclBenchmark):
+ def __init__(self, bench):
+ super().__init__(bench, "vec_add", "VectorAddition_multi")
+
diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py
index 06d2222ac4..9d79f78178 100644
--- a/scripts/benchmarks/benches/velocity.py
+++ b/scripts/benchmarks/benches/velocity.py
@@ -7,6 +7,7 @@
from .base import Benchmark
from .result import Result
from utils.utils import run, create_build_path
+from .options import options
import os
import re
@@ -51,7 +52,7 @@ def extra_env_vars(self) -> dict:
def parse_output(self, stdout: str) -> float:
raise NotImplementedError()
- def run(self, env_vars) -> Result:
+ def run(self, env_vars) -> list[Result]:
env_vars.update(self.extra_env_vars())
command = [
@@ -61,7 +62,7 @@ def run(self, env_vars) -> Result:
result = self.run_bench(command, env_vars)
- return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
+ return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ]
def teardown(self):
return
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
index d2b7ef8cd0..546ed36164 100755
--- a/scripts/benchmarks/main.py
+++ b/scripts/benchmarks/main.py
@@ -14,6 +14,7 @@
from benches.quicksilver import QuickSilver
from benches.SobelFilter import SobelFilter
from benches.velocity import VelocityBench
+from benches.syclbench import *
from benches.options import options
from output import generate_markdown
import argparse
@@ -25,10 +26,12 @@
def main(directory, additional_env_vars, save_name, compare_names, filter):
prepare_workdir(directory, INTERNAL_WORKDIR_VERSION)
- vb = VelocityBench(directory)
cb = ComputeBench(directory)
+ sb = SyclBench(directory)
+ vb = VelocityBench(directory)
benchmarks = [
+ # *** Compute benchmarks
SubmitKernelSYCL(cb, 0),
SubmitKernelSYCL(cb, 1),
SubmitKernelUR(cb, 0),
@@ -40,12 +43,53 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024),
ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024),
VectorSum(cb),
+
+ # *** Velocity benchmarks
Hashtable(vb),
Bitcracker(vb),
CudaSift(vb),
Easywave(vb),
QuickSilver(vb),
- SobelFilter(vb)
+ SobelFilter(vb),
+
+ # *** sycl-bench multi benchmarks
+ Blocked_transform(sb),
+ DagTaskI(sb),
+ DagTaskS(sb),
+ HostDevBandwidth(sb),
+ LocalMem(sb),
+ Pattern_L2(sb),
+ Reduction(sb),
+ ScalarProd(sb),
+ SegmentReduction(sb),
+ UsmAccLatency(sb),
+ UsmAllocLatency(sb),
+ UsmInstrMix(sb),
+ UsmPinnedOverhead(sb),
+ VecAdd(sb),
+
+ # *** sycl-bench single benchmarks
+ TwoDConvolution(sb),
+ Two_mm(sb),
+ Three_mm(sb),
+ Arith(sb),
+ Atax(sb),
+ Atomic_reduction(sb),
+ Bicg(sb),
+ Correlation(sb),
+ Covariance(sb),
+ Gemm(sb),
+ Gesumv(sb),
+ Gramschmidt(sb),
+ KMeans(sb),
+ LinRegCoeff(sb),
+ LinRegError(sb),
+ MatmulChain(sb),
+ MolDyn(sb),
+ Mvt(sb),
+ Sf(sb),
+ Syr2k(sb),
+ Syrk(sb),
]
if filter:
@@ -56,6 +100,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
print(f"setting up {benchmark.name()}... ", end='', flush=True)
benchmark.setup()
print("complete.")
+
except Exception as e:
if options.exit_on_failure:
raise e
@@ -71,22 +116,25 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
bench_results = benchmark.run(merged_env_vars)
if bench_results is not None:
- print(f"complete ({bench_results.value} {benchmark.unit()}).")
- iteration_results.append(bench_results)
+ for bench_result in bench_results:
+ print(f"complete ({bench_result.label}: {bench_result.value} {benchmark.unit()}).")
+ iteration_results.append(bench_result)
else:
print(f"did not finish.")
if len(iteration_results) == 0:
continue
- iteration_results.sort(key=lambda res: res.value)
- median_index = len(iteration_results) // 2
- median_result = iteration_results[median_index]
+ for label in set([result.label for result in iteration_results]):
+ label_results = [result for result in iteration_results if result.label == label]
+ label_results.sort(key=lambda res: res.value)
+ median_index = len(label_results) // 2
+ median_result = label_results[median_index]
- median_result.unit = benchmark.unit()
- median_result.name = benchmark.name()
+ median_result.unit = benchmark.unit()
+ median_result.name = label
- results.append(median_result)
+ results.append(median_result)
except Exception as e:
if options.exit_on_failure:
raise e
@@ -101,6 +149,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
chart_data = {"This PR" : results}
for name in compare_names:
+ print(f"compare name: {name}")
compare_result = load_benchmark_results(directory, name)
if compare_result:
chart_data[name] = compare_result
@@ -113,7 +162,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
with open('benchmark_results.md', 'w') as file:
file.write(markdown_content)
- print("Markdown with benchmark results has been written to benchmark_results.md")
+ print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md")
def validate_and_parse_env_args(env_args):
env_vars = {}
@@ -137,6 +186,7 @@ def validate_and_parse_env_args(env_args):
parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5)
parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)
+ parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005)
parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true")
parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true")
@@ -148,6 +198,7 @@ def validate_and_parse_env_args(env_args):
options.sycl = args.sycl
options.iterations = args.iterations
options.timeout = args.timeout
+ options.epsilon = args.epsilon
options.ur_dir = args.ur_dir
options.ur_adapter_name = args.ur_adapter_name
options.exit_on_failure = args.exit_on_failure
diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py
index 1a61f9909c..eec8957fe7 100644
--- a/scripts/benchmarks/output.py
+++ b/scripts/benchmarks/output.py
@@ -3,73 +3,33 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-import collections
+import collections, re
from benches.base import Result
+from benches.options import options
import math
-# Function to generate the mermaid bar chart script
-def generate_mermaid_script(chart_data: dict[str, list[Result]]):
- benches = collections.defaultdict(list)
- for (_, data) in chart_data.items():
- for res in data:
- benches[res.name].append(res.label)
+class OutputLine:
+ def __init__(self, name):
+ self.label = name
+ self.diff = None
+ self.bars = None
+ self.row = ""
- mermaid_script = ""
+ def __str__(self):
+ return f"(Label:{self.label}, diff:{self.diff})"
- for (bname, labels) in benches.items():
- # remove duplicates
- labels = list(dict.fromkeys(labels))
- mermaid_script += f"""
-
-{bname}
-
-```mermaid
----
-config:
- gantt:
- rightPadding: 10
- leftPadding: 120
- sectionFontSize: 10
- numberSectionStyles: 2
----
-gantt
- title {bname}
- todayMarker off
- dateFormat X
- axisFormat %s
-"""
- for label in labels:
- nbars = 0
- print_label = label.replace(" ", "
")
- mermaid_script += f"""
- section {print_label}
-"""
- for (name, data) in chart_data.items():
- for res in data:
- if bname == res.name and label == res.label:
- nbars += 1
- mean = res.value
- crit = "crit," if name == "This PR" else ""
- mermaid_script += f"""
- {name} ({mean} {res.unit}) : {crit} 0, {int(mean)}
-"""
- padding = 4 - nbars
- if padding > 0:
- for _ in range(padding):
- mermaid_script += f"""
- - : 0, 0
-"""
- mermaid_script += f"""
-```
-
-
-"""
-
- return mermaid_script
+ def __repr__(self):
+ return self.__str__()
# Function to generate the markdown collapsible sections for each variant
def generate_markdown_details(results: list[Result]):
markdown_sections = []
+
+ markdown_sections.append(f"""
+
+Benchmark details - environment, command, output...
+""")
+
for res in results:
env_vars_str = '\n'.join(f"{key}={value}" for key, value in res.env.items())
markdown_sections.append(f"""
@@ -85,13 +45,16 @@ def generate_markdown_details(results: list[Result]):
#### Output:
{res.stdout}
+
+""")
+ markdown_sections.append(f"""
""")
return "\n".join(markdown_sections)
-def generate_summary_table(chart_data: dict[str, list[Result]]):
- summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " |\n"
- summary_table += "|---" * (len(chart_data) + 1) + "|\n"
+def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
+ summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n"
+ summary_table += "|---" * (len(chart_data) + 4) + "|\n"
# Collect all benchmarks and their results
benchmark_results = collections.defaultdict(dict)
@@ -100,8 +63,18 @@ def generate_summary_table(chart_data: dict[str, list[Result]]):
benchmark_results[res.name][key] = res
# Generate the table rows
+ output_detailed_list = []
+
+
+ global_product = 1
+ mean_cnt = 0
+ improved = 0
+ regressed = 0
+ no_change = 0
+
for bname, results in benchmark_results.items():
- row = f"| {bname} |"
+ oln = OutputLine(bname)
+ oln.row = f"| {bname} |"
best_value = None
best_key = None
@@ -112,30 +85,140 @@ def generate_summary_table(chart_data: dict[str, list[Result]]):
best_key = key
# Generate the row with the best value highlighted
+ if options.verbose: print(f"Results: {results}")
for key in chart_data.keys():
if key in results:
- value = results[key].value
+ intv = results[key].value
if key == best_key:
- row += f" {value} |" # Highlight the best value
+ oln.row += f" {intv:3f} {results[key].unit} |" # Highlight the best value
+ else:
+ oln.row += f" {intv:.3f} {results[key].unit} |"
+ else:
+ oln.row += " - |"
+
+ if len(chart_data.keys()) == 2:
+ key0 = list(chart_data.keys())[0]
+ key1 = list(chart_data.keys())[1]
+ if (key0 in results) and (key1 in results):
+ v0 = results[key0].value
+ v1 = results[key1].value
+ diff = None
+ if v0 != 0 and results[key0].lower_is_better:
+ diff = v1/v0
+ elif v1 != 0 and not results[key0].lower_is_better:
+ diff = v0/v1
+
+ if diff != None:
+ oln.row += f"{(diff * 100):.2f}%"
+ oln.diff = diff
+
+ output_detailed_list.append(oln)
+
+
+ sorted_detailed_list = sorted(output_detailed_list, key=lambda x: (x.diff is not None, x.diff), reverse=True)
+
+ diff_values = [oln.diff for oln in sorted_detailed_list if oln.diff is not None]
+
+ if len(diff_values) > 0:
+ max_diff = max(max(diff_values) - 1, 1 - min(diff_values))
+
+ for oln in sorted_detailed_list:
+ if oln.diff != None:
+ oln.row += f" | {(oln.diff - 1)*100:.2f}%"
+ delta = oln.diff - 1
+ oln.bars = round(10*(oln.diff - 1)/max_diff)
+ if oln.bars == 0 or abs(delta) < options.epsilon:
+ oln.row += " | . |"
+ elif oln.bars > 0:
+ oln.row += f" | {'+' * oln.bars} |"
+ else:
+ oln.row += f" | {'-' * (-oln.bars)} |"
+
+ mean_cnt += 1
+ if abs(delta) > options.epsilon:
+ if delta > 0:
+ improved+=1
+ else:
+ regressed+=1
else:
- row += f" {value} |"
+ no_change+=1
+
+ global_product *= oln.diff
else:
- row += " - |"
+ oln.row += " | |"
+
+ if options.verbose: print(oln.row)
+ summary_table += oln.row + "\n"
+ else:
+ for oln in sorted_detailed_list:
+ oln.row += " | |"
+ if options.verbose: print(oln.row)
+ summary_table += oln.row + "\n"
+
+
+ grouped_objects = collections.defaultdict(list)
+
+ for oln in output_detailed_list:
+ s = oln.label
+ prefix = re.match(r'^[^_\s]+', s)[0]
+ grouped_objects[prefix].append(oln)
+
+ grouped_objects = dict(grouped_objects)
- summary_table += row + "\n"
+ if mean_cnt > 0:
+ global_mean = global_product ** (1/mean_cnt)
+ summary_line = f"Total {mean_cnt} benchmarks in mean. "
+ summary_line += "\n" + f"Geomean {global_mean*100:.3f}%. \nImproved {improved} Regressed {regressed} (threshold {options.epsilon*100:.2f}%)"
+ else:
+ summary_line = f"No diffs to calculate performance change"
+
+ if options.verbose: print(summary_line)
+
+
+ summary_table = "\n## Performance change in benchmark groups\n"
+
+ for name, outgroup in grouped_objects.items():
+ outgroup_s = sorted(outgroup, key=lambda x: (x.diff is not None, x.diff), reverse=True)
+ product = 1.0
+ n = len(outgroup_s)
+ r = 0
+ for oln in outgroup_s:
+ if oln.diff != None:
+ product *= oln.diff
+ r += 1
+ if r > 0:
+ summary_table += f"""
+
+ Relative perf in group {name} ({n}): {math.pow(product, 1/r)*100:.3f}%
+
+"""
+ else:
+ summary_table += f"""
+
+ Relative perf in group {name} ({n}): cannot calculate
+
+"""
+ summary_table += "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n"
+ summary_table += "|---" * (len(chart_data) + 4) + "|\n"
+
+ for oln in outgroup_s:
+ summary_table += f"{oln.row}\n"
+
+ summary_table += f"""
+
+
+"""
- return summary_table
+ return summary_line, summary_table
def generate_markdown(chart_data: dict[str, list[Result]]):
- mermaid_script = generate_mermaid_script(chart_data)
- summary_table = generate_summary_table(chart_data)
+ (summary_line, summary_table) = generate_summary_table_and_chart(chart_data)
return f"""
# Summary
-result is better\n
+{summary_line}\n
+(result is better)\n
{summary_table}
-# Charts
-{mermaid_script}
# Details
{generate_markdown_details(chart_data["This PR"])}
"""
diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py
index 5c7beb95d0..49f39709ec 100644
--- a/scripts/benchmarks/utils/utils.py
+++ b/scripts/benchmarks/utils/utils.py
@@ -8,9 +8,6 @@
import shutil
import subprocess # nosec B404
from pathlib import Path
-from collections import defaultdict
-import csv
-import io
from benches.result import Result
from benches.options import options