Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmarks #28

Merged
merged 8 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__pycache__
result_*
26 changes: 26 additions & 0 deletions benchmark/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Apply memory limit.
# The tested limit was 2^25kB 33_554_432kB ~ 33GB
if [[ -z "${MEMORY_LIMIT}" ]]; then
echo "Memory limit not set. Defaulting to 32GB."
MEMORY_LIMIT=33554432
fi

ulimit -v $MEMORY_LIMIT

# The script assumes that you have
# biodivine-boolean-functions and PyEDA
# already installed.

pip list > result_pip.txt

python3 bench_expr_parser.py &> result_expr_parser.tsv
python3 bench_expr_cnf.py &> result_expr_cnf.tsv

python3 bench_bdd_adder.py &> result_bdd_adder.tsv
python3 bench_bdd_monotonicity.py &> result_bdd_monotonicity.tsv

python3 bench_table_subst.py &> result_table_subst.tsv

zip -r results.zip result_*
76 changes: 76 additions & 0 deletions benchmark/bench_bdd_adder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import biodivine_boolean_functions as bbf
from pyeda.inter import *
from utils import run_bench

"""
This benchmark constructs a binary "ripple carry adder" BDD.

This function has a "good" and "bad" variable ordering, resulting
in BDDs of vastly different sizes. Here, we are using the "bad"
ordering to force the library to use significant resources.
"""


def var_name(var_id):
# These weird names ensure that (at least initially),
# the variables are sorted based on their ID.
return "v_" + ("x" * var_id)


def bench_adder_bbf(num_vars):
assert num_vars % 2 == 0 # Only even numbers are allowed.

variables = [var_name(i) for i in range(num_vars)]
literals = [bbf.Bdd.mk_literal(var, True) for var in variables]
result = bbf.Bdd.mk_const(False)

for x in range(int(num_vars / 2)):
x1 = literals[x]
x2 = literals[x + int(num_vars / 2)]
result = bbf.Bdd.mk_or(result, bbf.Bdd.mk_and(x1, x2))
return result


def bench_adder_pyeda(num_vars):
assert num_vars % 2 == 0 # Only even numbers are allowed.

variables = [var_name(i) for i in range(num_vars)]
literals = [bddvar(var) for var in variables]
result = 0

for x in range(int(num_vars / 2)):
x1 = literals[x]
x2 = literals[x + int(num_vars / 2)]
result = result | (x1 & x2)

return result


header = [
"Var. count",
"BBF[avg]",
"PyEDA[avg]",
"BBF[dev]",
"PyEDA[dev]",
"BBF[times]",
"PyEDA[times]"
]
print("\t".join(header))

for num_vars in range(1, 20):
num_vars = num_vars * 2 # Benchmark requires even variable count.

(bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_adder_bbf(num_vars))
(pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_adder_pyeda(num_vars))

row = [
str(num_vars),
str(bbf_avg),
str(pyeda_avg),
str(bbf_dev),
str(pyeda_dev),
str(bbf_times),
str(pyeda_times)
]

print("\t".join(row))
91 changes: 91 additions & 0 deletions benchmark/bench_bdd_monotonicity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import biodivine_boolean_functions as bbf
from pyeda.inter import *
from utils import run_bench

"""
This benchmark constructs a BDD representing the set of all monotonic
functions of the given arity. The size of this set are the "Dedekind
numbers". Computing dedekind numbers is an open problem with no known
solution better than O((n ** 2) ** 2) (i.e. double exponential).

Regardless of variable ordering, the BDD for this problem is exponentially
large. Furthermore, this problem is biologically relevant, since the
Boolean functions that typically appear in biological models are assumed
to be monotonic.
"""


def var_name(var_id):
# These weird names ensure that (at least initially),
# the variables are sorted based on their ID.
return "v_" + ("x" * var_id)


def bench_monotonicity_bbf(arity):
num_vars = 2 ** arity

variables = [var_name(i) for i in range(num_vars)]
literals = [bbf.Bdd.mk_literal(var, True) for var in variables]
result = bbf.Bdd.mk_const(True)

for i in range(arity):
block_size = 2 ** (i + 1)
half_block = int(block_size / 2)
regulator_formula = bbf.Bdd.mk_const(True)
for block in range(int(num_vars / block_size)):
for block_item in range(half_block):
var1 = literals[block_size * block + block_item]
var2 = literals[block_size * block + block_item + half_block]
implies = bbf.Bdd.mk_or(bbf.Bdd.mk_not(var1), var2)
regulator_formula = bbf.Bdd.mk_and(regulator_formula, implies)
result = bbf.Bdd.mk_and(result, regulator_formula)
return result


def bench_monotonicity_pyeda(arity):
num_vars = 2 ** arity

variables = [var_name(i) for i in range(num_vars)]
literals = [bddvar(var) for var in variables]
result = 1

for i in range(arity):
block_size = 2 ** (i + 1)
half_block = int(block_size / 2)
regulator_formula = 1
for block in range(int(num_vars / block_size)):
for block_item in range(half_block):
var1 = literals[block_size * block + block_item]
var2 = literals[block_size * block + block_item + half_block]
implies = ~var1 | var2
regulator_formula = regulator_formula & implies
result = result & regulator_formula
return result


header = [
"Var. count",
"BBF[avg]",
"PyEDA[avg]",
"BBF[dev]",
"PyEDA[dev]",
"BBF[times]",
"PyEDA[times]"
]
print("\t".join(header))

for num_vars in range(1, 6):
(bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_monotonicity_bbf(num_vars))
(pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_monotonicity_pyeda(num_vars))

row = [
str(num_vars),
str(bbf_avg),
str(pyeda_avg),
str(bbf_dev),
str(pyeda_dev),
str(bbf_times),
str(pyeda_times)
]

print("\t".join(row))
59 changes: 59 additions & 0 deletions benchmark/bench_expr_cnf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import biodivine_boolean_functions as bbf
from pyeda.inter import *
from utils import run_bench, load_expressions
import hashlib
import sys

# Some expressions are too large for PyEDA to process without this.
sys.setrecursionlimit(100_000)

"""
This benchmark compares the performance of the Boolean expression
parsers in each library on a set of expressions extracted from
real world biological models.
"""


def bench_cnf_bbf(e):
return e.to_cnf()


def bench_cnf_pyeda(e):
return e.to_cnf()


header = [
"hash + len",
"BBF[avg]",
"PyEDA[avg]",
"BBF[dev]",
"PyEDA[dev]",
"BBF[times]",
"PyEDA[times]"
]
print("\t".join(header))

# Expressions should be sorted by length.
for e_str in load_expressions('expressions.txt'):
bbf_e = bbf.Expression(e_str) # Expression is parsed only once.
(bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_cnf_bbf(bbf_e))

e_str = e_str.replace("!", "~") # PyEDA uses ~ as the negation operator.

pyeda_e = expr(e_str) # Expression is parsed only once.
(pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_cnf_pyeda(pyeda_e))

h = hashlib.new('sha256')
h.update(e_str.encode())

row = [
f"{h.hexdigest()}__{len(e_str)}",
str(bbf_avg),
str(pyeda_avg),
str(bbf_dev),
str(pyeda_dev),
str(bbf_times),
str(pyeda_times)
]

print("\t".join(row))
57 changes: 57 additions & 0 deletions benchmark/bench_expr_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import biodivine_boolean_functions as bbf
from pyeda.inter import *
from utils import run_bench, load_expressions
import hashlib
import sys

# Some expressions are too large for PyEDA to process without this.
sys.setrecursionlimit(100_000)

"""
This benchmark compares the performance of the Boolean expression
parsers in each library on a set of expressions extracted from
real world biological models.
"""


def bench_parse_bbf(e):
return bbf.Expression(e)


def bench_parse_pyeda(e):
return expr(e)


header = [
"hash + len",
"BBF[avg]",
"PyEDA[avg]",
"BBF[dev]",
"PyEDA[dev]",
"BBF[times]",
"PyEDA[times]"
]
print("\t".join(header))

# Expressions should be sorted by length.
for e_str in load_expressions('expressions.txt'):
(bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_parse_bbf(e_str))

e_str = e_str.replace("!", "~") # PyEDA uses ~ as the negation operator.

(pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_parse_pyeda(e_str))

h = hashlib.new('sha256')
h.update(e_str.encode())

row = [
f"{h.hexdigest()}__{len(e_str)}",
str(bbf_avg),
str(pyeda_avg),
str(bbf_dev),
str(pyeda_dev),
str(bbf_times),
str(pyeda_times)
]

print("\t".join(row))
65 changes: 65 additions & 0 deletions benchmark/bench_table_subst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import biodivine_boolean_functions as bbf
from pyeda.inter import *
from utils import run_bench

"""
This benchmark compares the performance of the logic table representations
using a repeated substitution while increasing the variable count of the
formula.

The operation starts with a formula 'x_0' and it iteratively
substitutes 'x_i' for '(x_i ^ x_{i+1})'', starting with i = 0. So in the
end, we obtain a formula (x_0 ^ (x_1 ^ (x_2 ^ ... ( ^ x_n)))).

This operation was chosen because it substitutes the variable for
a formula that contains the variable itself, plus another, new variable.
As such, it should be more complicated than basic operations on tables
where the arity of the function does not change.
"""


def bench_subst_bbf(num_vars):
variable_names = [f"x_{i}" for i in range(num_vars)]
variables = [bbf.Table.from_expression(bbf.Expression(f"x_{i}")) for i in range(num_vars)]
table = variables[0]
for i in range(num_vars - 1):
exp = bbf.Table.mk_xor(variables[i], variables[i + 1])
table = table.substitute({variable_names[i]: exp})
return table


def bench_subst_pyeda(num_vars):
variables = [ttvar(f"x_{i}") for i in range(num_vars)]
table = variables[0]
for i in range(num_vars - 1):
exp = variables[i] ^ variables[i + 1]
table = table.compose({variables[i]: exp})
return table


header = [
"Var. count",
"BBF[avg]",
"PyEDA[avg]",
"BBF[dev]",
"PyEDA[dev]",
"BBF[times]",
"PyEDA[times]"
]
print("\t".join(header))

for num_vars in range(2, 20):
(bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_subst_bbf(num_vars))
(pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_subst_pyeda(num_vars))

row = [
str(num_vars),
str(bbf_avg),
str(pyeda_avg),
str(bbf_dev),
str(pyeda_dev),
str(bbf_times),
str(pyeda_times)
]

print("\t".join(row))
Loading