Skip to content

Commit

Permalink
Docs: Added benchmarks page + plot
Browse files Browse the repository at this point in the history
  • Loading branch information
erezsh committed Mar 6, 2024
1 parent 585a457 commit 050b08c
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 1 deletion.
Binary file added docs/bench_dispatch.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/bench_dispatch_union.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/bench_validation.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 36 additions & 0 deletions docs/benchmarks.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Benchmarks
==========

The following benchmarks were run using `pytest-benchmark` and plotted using `matplotlib`.

The code for running and plotting these benchmarks is included in the repo.
See: ``docs/generate_benchmarks.sh``

Benchmark contributions for more use-cases or new libraries are welcome!


Validation (isinstance)
-----------------------

In the image below, we compare runtype to its (only?) competitor, the library `beartype <https://github.com/beartype/beartype>`_.

We can see the native ``isinstance()`` is faster than runtype's ``isa()``. However, it isn't quite a fair comparison,
because it doesn't support all the types that ``isa()`` supports.

.. image:: bench_validation.jpg


Dispatch
--------

In the images below, we compare runtype's multiple dispatch to its (only?) competitor, the library `plum <https://github.com/beartype/plum>`_.

We can see that the naive approach of using if-else is faster for a small amount of branches,
but by 32 branches runtype is already significantly faster.

Curiously, for dispatching to unions of types, runtype is twice faster (!) than the naive if-else approach,
even for a very low number of branches.

.. image:: bench_dispatch.jpg

.. image:: bench_dispatch_union.jpg
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@
# documentation.
#
html_theme_options = {
'prev_next_buttons_location': 'both'
'prev_next_buttons_location': 'both',
'collapse_navigation': False
}


Expand Down
8 changes: 8 additions & 0 deletions docs/generate_benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Run this script from the root directory of the repo
# Generate benchmarks JSONS:
mkdir tmp
pytest --benchmark-only --benchmark-json=tmp/bench_validation.json -k test_validation -x
pytest --benchmark-only --benchmark-json=tmp/bench_dispatch.json -k test_dispatch -x
python docs/plot_benchmarks.py tmp/bench_validation.json - docs/bench_validation.jpg
python docs/plot_benchmarks.py tmp/bench_dispatch.json A docs/bench_dispatch.jpg
python docs/plot_benchmarks.py tmp/bench_dispatch.json B docs/bench_dispatch_union.jpg
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Welcome to Runtype's documentation!
dataclass
dispatch
types
benchmarks


Runtype is a collection of run-time type utilities for Python.
Expand Down
125 changes: 125 additions & 0 deletions docs/plot_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Create JSON using:
#
# pytest --benchmark-only --benchmark-json=bench.json -x
#
# OR:
# pytest --benchmark-only --benchmark-json=bench_validation.json -k test_validation -x
# pytest --benchmark-only --benchmark-json=bench_dispatch.json -k test_dispatch -x
#

import sys
import json
import matplotlib.pyplot as plt

# Step 1: Read the JSON file
def read_benchmark_json(file_path):
with open(file_path, 'r') as f:
data = json.load(f)
return data

# Step 2: Parse the JSON data
def parse_benchmarks(data, group_mod):
grouped_data: dict[str, list] = {}

for bench in data['benchmarks']:
group = bench['group']
if '$$' in group:
name, mods = group.split('$$')
else:
name = group or bench['name']
mods = ""
libname = bench['params'].get('libname', '?')
value = bench['stats']['median']
skip = False
if mods:
for mod in mods.split():
mod_name, mod_value = mod.split(':')
if mod_name == 'div':
value /= float(mod_value)
elif mod_name == 'group':
if group_mod and group_mod != mod_value:
skip = True
else:
raise RuntimeError()

if skip:
continue
value *= 1000000
grouped_data.setdefault(name, []).append((name, value, libname))
return grouped_data

LIB_NAME_ORDER = [
"runtype",
"beartype",
"plum",
"stdlib",
]
COLOR_BY_LIBNAME = {
"plum": "purple",
"beartype": "brown",
"runtype": "green",
"stdlib": "lightgrey",
}

# Step 3: Plot the data using matplotlib
def plot_benchmarks(data):
groups = sorted(data.keys(), reverse=True)

plt.figure(figsize=(10, 8))
# plt.barh(names, values, color='skyblue')

bars = []

i = 0
for group in groups:
data[group].sort(key=lambda x: LIB_NAME_ORDER.index(x[2].split()[0]), reverse=True)
for name, median, libname in data[group]:
bars.append((name, libname, median, COLOR_BY_LIBNAME[libname.split()[0]]))
i += 1
bars.append(None)


labels = []

# Creating the horizontal bar chart
for i, x in enumerate(bars):
if x is None:
labels.append('')
continue
(name, label, value, color) = x
label = label if label not in plt.gca().get_legend_handles_labels()[1] else ""
plt.barh(i, value, color=color, label=label)
plt.text(value, i, '%.2f' % value, color=color, fontweight='bold', verticalalignment='center')
labels.append(name)

# Setting the y-ticks to show the labels correctly
prev = None
for i, l in reversed(list(enumerate(labels))):
if prev == l:
labels[i] = ''
prev = l

# Adding legend
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
ncol=3, fancybox=True, shadow=True)
plt.yticks(ticks=range(len(labels)), labels=labels)
plt.xlabel('Median Duration in microseconds') # Adjust label as per the metric used
plt.title('Benchmark Results')
plt.tight_layout(pad=1)

def main():
file_path = sys.argv[1]
group = sys.argv[2] if len(sys.argv) > 2 else None
if group == '-':
group = None
save_to_file = sys.argv[3] if len(sys.argv) > 3 else None
data = read_benchmark_json(file_path)
data = parse_benchmarks(data, group)
plot_benchmarks(data)
if save_to_file:
plt.savefig(save_to_file)
else:
plt.show()

if __name__ == '__main__':
main()

0 comments on commit 050b08c

Please sign in to comment.