From d9d24ec92b153a684c5f55b0113bae3c06a87441 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" <112635238+mateuszpn@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:09:23 +0200 Subject: [PATCH] sycl-bench added & ascii bar chart (#2115) --- scripts/benchmarks/benches/base.py | 3 +- scripts/benchmarks/benches/compute.py | 4 +- scripts/benchmarks/benches/quicksilver.py | 2 +- scripts/benchmarks/benches/syclbench.py | 370 ++++++++++++++++++++++ scripts/benchmarks/benches/velocity.py | 5 +- scripts/benchmarks/main.py | 73 ++++- scripts/benchmarks/output.py | 229 ++++++++----- scripts/benchmarks/utils/utils.py | 3 - 8 files changed, 595 insertions(+), 94 deletions(-) create mode 100644 scripts/benchmarks/benches/syclbench.py diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index e4377c8b65..98f746c479 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -6,7 +6,6 @@ import os import shutil from pathlib import Path -import subprocess # nosec B404 from .result import Result from .options import options from utils.utils import run @@ -57,7 +56,7 @@ def lower_is_better(self): def setup(self): raise NotImplementedError() - def run(self, env_vars) -> Result: + def run(self, env_vars) -> list[Result]: raise NotImplementedError() def teardown(self): diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index cf164721a6..98be6c0df0 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -65,7 +65,7 @@ def setup(self): self.bench.setup() self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name) - def run(self, env_vars) -> Result: + def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", f"--test={self.test}", @@ -78,7 +78,7 @@ def run(self, env_vars) -> Result: result = self.run_bench(command, env_vars) (label, mean) = self.parse_output(result) - return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) + return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] def parse_output(self, output): csv_file = io.StringIO(output) diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py index 7e1f65ee1d..c864e6c368 100644 --- a/scripts/benchmarks/benches/quicksilver.py +++ b/scripts/benchmarks/benches/quicksilver.py @@ -15,7 +15,7 @@ def __init__(self, vb: VelocityBench): super().__init__("QuickSilver", "qs", vb) self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") - def run(self, env_vars) -> Result: + def run(self, env_vars) -> list[Result]: # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': return None diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py new file mode 100644 index 0000000000..f52c68c2dd --- /dev/null +++ b/scripts/benchmarks/benches/syclbench.py @@ -0,0 +1,370 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import csv +import io +from utils.utils import run, git_clone, create_build_path +from .base import Benchmark +from .result import Result +from .options import options + +class SyclBench: + def __init__(self, directory): + self.directory = directory + self.built = False + self.setup() + return + + def setup(self): + if self.built: + return + + build_path = os.path.join(self.directory, 'sycl-bench-build') + create_build_path(build_path, '') + + repo_path = git_clone(self.directory, "sycl-bench-repo", "https://github.com/mateuszpn/sycl-bench.git", "1e6ab2cfd004a72c5336c26945965017e06eab71") + + configure_command = [ + "cmake", + f"-B {build_path}", + f"-S {repo_path}", + f"-DCMAKE_BUILD_TYPE=Release", + f"-DCMAKE_CXX_COMPILER={options.sycl}/bin/clang++", + f"-DCMAKE_C_COMPILER={options.sycl}/bin/clang", + f"-DSYCL_IMPL=dpcpp" + ] + + print(f"Run {configure_command}") + run(configure_command, add_sycl=True) + + print(f"Run cmake --build {build_path}") + run(f"cmake --build {build_path} -j", add_sycl=True) + + self.built = True + self.bins = build_path + +class SyclBenchmark(Benchmark): + def __init__(self, bench, name, test): + self.bench = bench + self.bench_name = name + self.test = test + super().__init__(bench.directory) + + def bin_args(self) -> list[str]: + return [] + + def extra_env_vars(self) -> dict: + return {} + + def unit(self): + return "ms" + + def setup(self): + self.bench.setup() + self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name) + + def run(self, env_vars) -> list[Result]: + outputfile = f"{self.bench.directory}/{self.test}.csv" + command = [ + f"{self.benchmark_bin}", + f"--warmup-run", + f"--num-runs=3", + f"--output={outputfile}" + ] + bin_dir = self.bench.bins + + command += self.bin_args() + env_vars.update(self.extra_env_vars()) + + # no output to stdout, all in outputfile + self.run_bench(command, env_vars) + + with open(outputfile, 'r') as f: + reader = csv.reader(f) + res_list = [] + for row in reader: + if not row[0].startswith('#'): + res_list.append( + Result(label=row[0], + value=float(row[12]) * 1000, # convert to ms + command=command, + env=env_vars, + stdout=row)) + + return res_list + + def teardown(self): + return + + def name(self): + return self.test + +class Arith(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "arith", "Arith_int32_512") + + def bin_args(self) -> list[str]: + return [ + f"--size=16384", + ] + +class TwoDConvolution(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "2DConvolution", "2DConvolution") + +class Two_mm(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "2mm", "2mm") + + def bin_args(self) -> list[str]: + return [ + f"--size=512", + ] + +class Three_mm(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "3mm", "3mm") + + def bin_args(self) -> list[str]: + return [ + f"--size=512", + ] + +class Atax(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "atax", "Atax") + + def bin_args(self) -> list[str]: + return [ + f"--size=8192", + ] + +class Atomic_reduction(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "atomic_reduction", "ReductionAtomic_fp64") + +class Bicg(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "bicg", "Bicg") + + def bin_args(self) -> list[str]: + return [ + f"--size=20480", + ] + +class Correlation(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "correlation", "Correlation") + + def bin_args(self) -> list[str]: + return [ + f"--size=2048", + ] + +class Covariance(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "covariance", "Covariance") + + def bin_args(self) -> list[str]: + return [ + f"--size=2048", + ] + +class Gemm(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "gemm", "Gemm") + + def bin_args(self) -> list[str]: + return [ + f"--size=8192", + ] + +class Gesumv(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "gesummv", "Gesummv") + + def bin_args(self) -> list[str]: + return [ + f"--size=8192", + ] + +class Gramschmidt(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "gramschmidt", "Gramschmidt") + + def bin_args(self) -> list[str]: + return [ + f"--size=512", + ] + +class KMeans(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "kmeans", "Kmeans") + + def bin_args(self) -> list[str]: + return [ + f"--size=700000000", + ] + +class LinRegCoeff(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "lin_reg_coeff", "LinearRegressionCoeff") + + def bin_args(self) -> list[str]: + return [ + f"--size=1638400000", + ] + +class LinRegError(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "lin_reg_error", "LinearRegression") + + def bin_args(self) -> list[str]: + return [ + f"--size=640000", + ] + +class MatmulChain(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "matmulchain", "MatmulChain") + + def bin_args(self) -> list[str]: + return [ + f"--size=2048", + ] + +class MolDyn(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "mol_dyn", "MolecularDynamics") + + def bin_args(self) -> list[str]: + return [ + f"--size=8196", + ] + +class Mvt(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "mvt", "Mvt") + + def bin_args(self) -> list[str]: + return [ + f"--size=32767", + ] + +class NBody(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "nbody", "NBody_") + + def bin_args(self) -> list[str]: + return [ + f"--size=81920", + ] + +class Sf(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "sf", "sf_16") + + def bin_args(self) -> list[str]: + return [ + f"--size=--size=100000000", + ] + +class Syr2k(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "syr2k", "Syr2k") + + def bin_args(self) -> list[str]: + return [ + f"--size=6144", + ] + +class Syrk(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "syrk", "Syrk") + + def bin_args(self) -> list[str]: + return [ + f"--size=4096", + ] + +# multi benchmarks +class Blocked_transform(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "blocked_transform", "BlockedTransform_multi") + + def bin_args(self) -> list[str]: + return [ + f"--size=16384", + f"--local=1024" + ] + +class DagTaskI(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "dag_task_throughput_independent", "IndependentDAGTaskThroughput_multi") + + def bin_args(self) -> list[str]: + return [ + f"--size=32768", + ] + +class DagTaskS(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "dag_task_throughput_sequential", "DAGTaskThroughput_multi") + + def bin_args(self) -> list[str]: + return [ + f"--size=327680", + ] + +class HostDevBandwidth(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "host_device_bandwidth", "HostDeviceBandwidth_multi") + +class LocalMem(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "local_mem", f"LocalMem_multi") + + def bin_args(self) -> list[str]: + return [ + f"--size=512", + ] + +class Pattern_L2(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "pattern_L2", "L2_multi") + +class Reduction(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "reduction", "Pattern_Reduction_multi") + +class ScalarProd(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "scalar_prod", "ScalarProduct_multi") + +class SegmentReduction(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "segmentedreduction", "Pattern_SegmentedReduction_multi") + +class UsmAccLatency(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "usm_accessors_latency", "USM_Latency_multi") + +class UsmAllocLatency(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "usm_allocation_latency", "USM_Allocation_latency_multi") + +class UsmInstrMix(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "usm_instr_mix", "USM_Instr_Mix_multi") + +class UsmPinnedOverhead(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "usm_pinned_overhead", "USM_Pinned_Overhead_multi") + +class VecAdd(SyclBenchmark): + def __init__(self, bench): + super().__init__(bench, "vec_add", "VectorAddition_multi") + diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index 06d2222ac4..9d79f78178 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -7,6 +7,7 @@ from .base import Benchmark from .result import Result from utils.utils import run, create_build_path +from .options import options import os import re @@ -51,7 +52,7 @@ def extra_env_vars(self) -> dict: def parse_output(self, stdout: str) -> float: raise NotImplementedError() - def run(self, env_vars) -> Result: + def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) command = [ @@ -61,7 +62,7 @@ def run(self, env_vars) -> Result: result = self.run_bench(command, env_vars) - return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) + return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] def teardown(self): return diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index d2b7ef8cd0..546ed36164 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -14,6 +14,7 @@ from benches.quicksilver import QuickSilver from benches.SobelFilter import SobelFilter from benches.velocity import VelocityBench +from benches.syclbench import * from benches.options import options from output import generate_markdown import argparse @@ -25,10 +26,12 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) - vb = VelocityBench(directory) cb = ComputeBench(directory) + sb = SyclBench(directory) + vb = VelocityBench(directory) benchmarks = [ + # *** Compute benchmarks SubmitKernelSYCL(cb, 0), SubmitKernelSYCL(cb, 1), SubmitKernelUR(cb, 0), @@ -40,12 +43,53 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024), ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024), VectorSum(cb), + + # *** Velocity benchmarks Hashtable(vb), Bitcracker(vb), CudaSift(vb), Easywave(vb), QuickSilver(vb), - SobelFilter(vb) + SobelFilter(vb), + + # *** sycl-bench multi benchmarks + Blocked_transform(sb), + DagTaskI(sb), + DagTaskS(sb), + HostDevBandwidth(sb), + LocalMem(sb), + Pattern_L2(sb), + Reduction(sb), + ScalarProd(sb), + SegmentReduction(sb), + UsmAccLatency(sb), + UsmAllocLatency(sb), + UsmInstrMix(sb), + UsmPinnedOverhead(sb), + VecAdd(sb), + + # *** sycl-bench single benchmarks + TwoDConvolution(sb), + Two_mm(sb), + Three_mm(sb), + Arith(sb), + Atax(sb), + Atomic_reduction(sb), + Bicg(sb), + Correlation(sb), + Covariance(sb), + Gemm(sb), + Gesumv(sb), + Gramschmidt(sb), + KMeans(sb), + LinRegCoeff(sb), + LinRegError(sb), + MatmulChain(sb), + MolDyn(sb), + Mvt(sb), + Sf(sb), + Syr2k(sb), + Syrk(sb), ] if filter: @@ -56,6 +100,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"setting up {benchmark.name()}... ", end='', flush=True) benchmark.setup() print("complete.") + except Exception as e: if options.exit_on_failure: raise e @@ -71,22 +116,25 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) bench_results = benchmark.run(merged_env_vars) if bench_results is not None: - print(f"complete ({bench_results.value} {benchmark.unit()}).") - iteration_results.append(bench_results) + for bench_result in bench_results: + print(f"complete ({bench_result.label}: {bench_result.value} {benchmark.unit()}).") + iteration_results.append(bench_result) else: print(f"did not finish.") if len(iteration_results) == 0: continue - iteration_results.sort(key=lambda res: res.value) - median_index = len(iteration_results) // 2 - median_result = iteration_results[median_index] + for label in set([result.label for result in iteration_results]): + label_results = [result for result in iteration_results if result.label == label] + label_results.sort(key=lambda res: res.value) + median_index = len(label_results) // 2 + median_result = label_results[median_index] - median_result.unit = benchmark.unit() - median_result.name = benchmark.name() + median_result.unit = benchmark.unit() + median_result.name = label - results.append(median_result) + results.append(median_result) except Exception as e: if options.exit_on_failure: raise e @@ -101,6 +149,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): chart_data = {"This PR" : results} for name in compare_names: + print(f"compare name: {name}") compare_result = load_benchmark_results(directory, name) if compare_result: chart_data[name] = compare_result @@ -113,7 +162,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): with open('benchmark_results.md', 'w') as file: file.write(markdown_content) - print("Markdown with benchmark results has been written to benchmark_results.md") + print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") def validate_and_parse_env_args(env_args): env_vars = {} @@ -137,6 +186,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5) parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600) parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) + parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005) parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true") @@ -148,6 +198,7 @@ def validate_and_parse_env_args(env_args): options.sycl = args.sycl options.iterations = args.iterations options.timeout = args.timeout + options.epsilon = args.epsilon options.ur_dir = args.ur_dir options.ur_adapter_name = args.ur_adapter_name options.exit_on_failure = args.exit_on_failure diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py index 1a61f9909c..eec8957fe7 100644 --- a/scripts/benchmarks/output.py +++ b/scripts/benchmarks/output.py @@ -3,73 +3,33 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import collections +import collections, re from benches.base import Result +from benches.options import options import math -# Function to generate the mermaid bar chart script -def generate_mermaid_script(chart_data: dict[str, list[Result]]): - benches = collections.defaultdict(list) - for (_, data) in chart_data.items(): - for res in data: - benches[res.name].append(res.label) +class OutputLine: + def __init__(self, name): + self.label = name + self.diff = None + self.bars = None + self.row = "" - mermaid_script = "" + def __str__(self): + return f"(Label:{self.label}, diff:{self.diff})" - for (bname, labels) in benches.items(): - # remove duplicates - labels = list(dict.fromkeys(labels)) - mermaid_script += f""" -
-{bname} - -```mermaid ---- -config: - gantt: - rightPadding: 10 - leftPadding: 120 - sectionFontSize: 10 - numberSectionStyles: 2 ---- -gantt - title {bname} - todayMarker off - dateFormat X - axisFormat %s -""" - for label in labels: - nbars = 0 - print_label = label.replace(" ", "
") - mermaid_script += f""" - section {print_label} -""" - for (name, data) in chart_data.items(): - for res in data: - if bname == res.name and label == res.label: - nbars += 1 - mean = res.value - crit = "crit," if name == "This PR" else "" - mermaid_script += f""" - {name} ({mean} {res.unit}) : {crit} 0, {int(mean)} -""" - padding = 4 - nbars - if padding > 0: - for _ in range(padding): - mermaid_script += f""" - - : 0, 0 -""" - mermaid_script += f""" -``` - -
-""" - - return mermaid_script + def __repr__(self): + return self.__str__() # Function to generate the markdown collapsible sections for each variant def generate_markdown_details(results: list[Result]): markdown_sections = [] + + markdown_sections.append(f""" +
+Benchmark details - environment, command, output... +""") + for res in results: env_vars_str = '\n'.join(f"{key}={value}" for key, value in res.env.items()) markdown_sections.append(f""" @@ -85,13 +45,16 @@ def generate_markdown_details(results: list[Result]): #### Output: {res.stdout} +
+""") + markdown_sections.append(f""" """) return "\n".join(markdown_sections) -def generate_summary_table(chart_data: dict[str, list[Result]]): - summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " |\n" - summary_table += "|---" * (len(chart_data) + 1) + "|\n" +def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): + summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n" + summary_table += "|---" * (len(chart_data) + 4) + "|\n" # Collect all benchmarks and their results benchmark_results = collections.defaultdict(dict) @@ -100,8 +63,18 @@ def generate_summary_table(chart_data: dict[str, list[Result]]): benchmark_results[res.name][key] = res # Generate the table rows + output_detailed_list = [] + + + global_product = 1 + mean_cnt = 0 + improved = 0 + regressed = 0 + no_change = 0 + for bname, results in benchmark_results.items(): - row = f"| {bname} |" + oln = OutputLine(bname) + oln.row = f"| {bname} |" best_value = None best_key = None @@ -112,30 +85,140 @@ def generate_summary_table(chart_data: dict[str, list[Result]]): best_key = key # Generate the row with the best value highlighted + if options.verbose: print(f"Results: {results}") for key in chart_data.keys(): if key in results: - value = results[key].value + intv = results[key].value if key == best_key: - row += f" {value} |" # Highlight the best value + oln.row += f" {intv:3f} {results[key].unit} |" # Highlight the best value + else: + oln.row += f" {intv:.3f} {results[key].unit} |" + else: + oln.row += " - |" + + if len(chart_data.keys()) == 2: + key0 = list(chart_data.keys())[0] + key1 = list(chart_data.keys())[1] + if (key0 in results) and (key1 in results): + v0 = results[key0].value + v1 = results[key1].value + diff = None + if v0 != 0 and results[key0].lower_is_better: + diff = v1/v0 + elif v1 != 0 and not results[key0].lower_is_better: + diff = v0/v1 + + if diff != None: + oln.row += f"{(diff * 100):.2f}%" + oln.diff = diff + + output_detailed_list.append(oln) + + + sorted_detailed_list = sorted(output_detailed_list, key=lambda x: (x.diff is not None, x.diff), reverse=True) + + diff_values = [oln.diff for oln in sorted_detailed_list if oln.diff is not None] + + if len(diff_values) > 0: + max_diff = max(max(diff_values) - 1, 1 - min(diff_values)) + + for oln in sorted_detailed_list: + if oln.diff != None: + oln.row += f" | {(oln.diff - 1)*100:.2f}%" + delta = oln.diff - 1 + oln.bars = round(10*(oln.diff - 1)/max_diff) + if oln.bars == 0 or abs(delta) < options.epsilon: + oln.row += " | . |" + elif oln.bars > 0: + oln.row += f" | {'+' * oln.bars} |" + else: + oln.row += f" | {'-' * (-oln.bars)} |" + + mean_cnt += 1 + if abs(delta) > options.epsilon: + if delta > 0: + improved+=1 + else: + regressed+=1 else: - row += f" {value} |" + no_change+=1 + + global_product *= oln.diff else: - row += " - |" + oln.row += " | |" + + if options.verbose: print(oln.row) + summary_table += oln.row + "\n" + else: + for oln in sorted_detailed_list: + oln.row += " | |" + if options.verbose: print(oln.row) + summary_table += oln.row + "\n" + + + grouped_objects = collections.defaultdict(list) + + for oln in output_detailed_list: + s = oln.label + prefix = re.match(r'^[^_\s]+', s)[0] + grouped_objects[prefix].append(oln) + + grouped_objects = dict(grouped_objects) - summary_table += row + "\n" + if mean_cnt > 0: + global_mean = global_product ** (1/mean_cnt) + summary_line = f"Total {mean_cnt} benchmarks in mean. " + summary_line += "\n" + f"Geomean {global_mean*100:.3f}%. \nImproved {improved} Regressed {regressed} (threshold {options.epsilon*100:.2f}%)" + else: + summary_line = f"No diffs to calculate performance change" + + if options.verbose: print(summary_line) + + + summary_table = "\n## Performance change in benchmark groups\n" + + for name, outgroup in grouped_objects.items(): + outgroup_s = sorted(outgroup, key=lambda x: (x.diff is not None, x.diff), reverse=True) + product = 1.0 + n = len(outgroup_s) + r = 0 + for oln in outgroup_s: + if oln.diff != None: + product *= oln.diff + r += 1 + if r > 0: + summary_table += f""" +
+ Relative perf in group {name} ({n}): {math.pow(product, 1/r)*100:.3f}% + +""" + else: + summary_table += f""" +
+ Relative perf in group {name} ({n}): cannot calculate + +""" + summary_table += "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n" + summary_table += "|---" * (len(chart_data) + 4) + "|\n" + + for oln in outgroup_s: + summary_table += f"{oln.row}\n" + + summary_table += f""" +
+ +""" - return summary_table + return summary_line, summary_table def generate_markdown(chart_data: dict[str, list[Result]]): - mermaid_script = generate_mermaid_script(chart_data) - summary_table = generate_summary_table(chart_data) + (summary_line, summary_table) = generate_summary_table_and_chart(chart_data) return f""" # Summary -result is better\n +{summary_line}\n +(result is better)\n {summary_table} -# Charts -{mermaid_script} # Details {generate_markdown_details(chart_data["This PR"])} """ diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 5c7beb95d0..49f39709ec 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -8,9 +8,6 @@ import shutil import subprocess # nosec B404 from pathlib import Path -from collections import defaultdict -import csv -import io from benches.result import Result from benches.options import options