sybila · AurumTheEnd · May 22, 2024 · May 20, 2024 · May 20, 2024 · May 20, 2024
diff --git a/benchmark/.gitignore b/benchmark/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+result_*
diff --git a/benchmark/bench.sh b/benchmark/bench.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Apply memory limit.
+# The tested limit was 2^25kB 33_554_432kB ~ 33GB
+if [[ -z "${MEMORY_LIMIT}" ]]; then
+  echo "Memory limit not set. Defaulting to 32GB."
+  MEMORY_LIMIT=33554432
+fi
+
+ulimit -v $MEMORY_LIMIT
+
+# The script assumes that you have 
+# biodivine-boolean-functions and PyEDA
+# already installed.
+
+pip list > result_pip.txt
+
+python3 bench_expr_parser.py &> result_expr_parser.tsv
+python3 bench_expr_cnf.py &> result_expr_cnf.tsv
+
+python3 bench_bdd_adder.py &> result_bdd_adder.tsv
+python3 bench_bdd_monotonicity.py &> result_bdd_monotonicity.tsv
+
+python3 bench_table_subst.py &> result_table_subst.tsv 
+
+zip -r results.zip result_*
diff --git a/benchmark/bench_bdd_adder.py b/benchmark/bench_bdd_adder.py
@@ -0,0 +1,76 @@
+import biodivine_boolean_functions as bbf
+from pyeda.inter import *
+from utils import run_bench
+
+"""
+This benchmark constructs a binary "ripple carry adder" BDD.
+
+This function has a "good" and "bad" variable ordering, resulting
+in BDDs of vastly different sizes. Here, we are using the "bad"
+ordering to force the library to use significant resources.
+"""
+
+
+def var_name(var_id):
+    # These weird names ensure that (at least initially),
+    # the variables are sorted based on their ID.
+    return "v_" + ("x" * var_id)
+
+
+def bench_adder_bbf(num_vars):
+    assert num_vars % 2 == 0  # Only even numbers are allowed.
+
+    variables = [var_name(i) for i in range(num_vars)]
+    literals = [bbf.Bdd.mk_literal(var, True) for var in variables]
+    result = bbf.Bdd.mk_const(False)
+
+    for x in range(int(num_vars / 2)):
+        x1 = literals[x]
+        x2 = literals[x + int(num_vars / 2)]
+        result = bbf.Bdd.mk_or(result, bbf.Bdd.mk_and(x1, x2))
+    return result
+
+
+def bench_adder_pyeda(num_vars):
+    assert num_vars % 2 == 0  # Only even numbers are allowed.
+
+    variables = [var_name(i) for i in range(num_vars)]
+    literals = [bddvar(var) for var in variables]
+    result = 0
+
+    for x in range(int(num_vars / 2)):
+        x1 = literals[x]
+        x2 = literals[x + int(num_vars / 2)]
+        result = result | (x1 & x2)
+
+    return result
+
+
+header = [
+    "Var. count",
+    "BBF[avg]",
+    "PyEDA[avg]",
+    "BBF[dev]",
+    "PyEDA[dev]",
+    "BBF[times]",
+    "PyEDA[times]"
+]
+print("\t".join(header))
+
+for num_vars in range(1, 20):
+    num_vars = num_vars * 2  # Benchmark requires even variable count.
+
+    (bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_adder_bbf(num_vars))
+    (pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_adder_pyeda(num_vars))
+
+    row = [
+        str(num_vars),
+        str(bbf_avg),
+        str(pyeda_avg),
+        str(bbf_dev),
+        str(pyeda_dev),
+        str(bbf_times),
+        str(pyeda_times)
+    ]
+
+    print("\t".join(row))
diff --git a/benchmark/bench_bdd_monotonicity.py b/benchmark/bench_bdd_monotonicity.py
@@ -0,0 +1,91 @@
+import biodivine_boolean_functions as bbf
+from pyeda.inter import *
+from utils import run_bench
+
+"""
+This benchmark constructs a BDD representing the set of all monotonic
+functions of the given arity. The size of this set are the "Dedekind
+numbers". Computing dedekind numbers is an open problem with no known
+solution better than O((n ** 2) ** 2) (i.e. double exponential).
+
+Regardless of variable ordering, the BDD for this problem is exponentially
+large. Furthermore, this problem is biologically relevant, since the
+Boolean functions that typically appear in biological models are assumed
+to be monotonic.
+"""
+
+
+def var_name(var_id):
+    # These weird names ensure that (at least initially),
+    # the variables are sorted based on their ID.
+    return "v_" + ("x" * var_id)
+
+
+def bench_monotonicity_bbf(arity):
+    num_vars = 2 ** arity
+
+    variables = [var_name(i) for i in range(num_vars)]
+    literals = [bbf.Bdd.mk_literal(var, True) for var in variables]
+    result = bbf.Bdd.mk_const(True)
+
+    for i in range(arity):
+        block_size = 2 ** (i + 1)
+        half_block = int(block_size / 2)
+        regulator_formula = bbf.Bdd.mk_const(True)
+        for block in range(int(num_vars / block_size)):
+            for block_item in range(half_block):
+                var1 = literals[block_size * block + block_item]
+                var2 = literals[block_size * block + block_item + half_block]
+                implies = bbf.Bdd.mk_or(bbf.Bdd.mk_not(var1), var2)
+                regulator_formula = bbf.Bdd.mk_and(regulator_formula, implies)
+        result = bbf.Bdd.mk_and(result, regulator_formula)
+    return result
+
+
+def bench_monotonicity_pyeda(arity):
+    num_vars = 2 ** arity
+
+    variables = [var_name(i) for i in range(num_vars)]
+    literals = [bddvar(var) for var in variables]
+    result = 1
+
+    for i in range(arity):
+        block_size = 2 ** (i + 1)
+        half_block = int(block_size / 2)
+        regulator_formula = 1
+        for block in range(int(num_vars / block_size)):
+            for block_item in range(half_block):
+                var1 = literals[block_size * block + block_item]
+                var2 = literals[block_size * block + block_item + half_block]
+                implies = ~var1 | var2
+                regulator_formula = regulator_formula & implies
+        result = result & regulator_formula
+    return result
+
+
+header = [
+    "Var. count",
+    "BBF[avg]",
+    "PyEDA[avg]",
+    "BBF[dev]",
+    "PyEDA[dev]",
+    "BBF[times]",
+    "PyEDA[times]"
+]
+print("\t".join(header))
+
+for num_vars in range(1, 6):
+    (bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_monotonicity_bbf(num_vars))
+    (pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_monotonicity_pyeda(num_vars))
+
+    row = [
+        str(num_vars),
+        str(bbf_avg),
+        str(pyeda_avg),
+        str(bbf_dev),
+        str(pyeda_dev),
+        str(bbf_times),
+        str(pyeda_times)
+    ]
+
+    print("\t".join(row))
diff --git a/benchmark/bench_expr_cnf.py b/benchmark/bench_expr_cnf.py
@@ -0,0 +1,59 @@
+import biodivine_boolean_functions as bbf
+from pyeda.inter import *
+from utils import run_bench, load_expressions
+import hashlib
+import sys
+
+# Some expressions are too large for PyEDA to process without this.
+sys.setrecursionlimit(100_000)
+
+"""
+This benchmark compares the performance of the Boolean expression
+parsers in each library on a set of expressions extracted from
+real world biological models.
+"""
+
+
+def bench_cnf_bbf(e):
+    return e.to_cnf()
+
+
+def bench_cnf_pyeda(e):
+    return e.to_cnf()
+
+
+header = [
+    "hash + len",
+    "BBF[avg]",
+    "PyEDA[avg]",
+    "BBF[dev]",
+    "PyEDA[dev]",
+    "BBF[times]",
+    "PyEDA[times]"
+]
+print("\t".join(header))
+
+# Expressions should be sorted by length.
+for e_str in load_expressions('expressions.txt'):
+    bbf_e = bbf.Expression(e_str)  # Expression is parsed only once.
+    (bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_cnf_bbf(bbf_e))
+
+    e_str = e_str.replace("!", "~")  # PyEDA uses ~ as the negation operator.
+
+    pyeda_e = expr(e_str)  # Expression is parsed only once.
+    (pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_cnf_pyeda(pyeda_e))
+
+    h = hashlib.new('sha256')
+    h.update(e_str.encode())
+
+    row = [
+        f"{h.hexdigest()}__{len(e_str)}",
+        str(bbf_avg),
+        str(pyeda_avg),
+        str(bbf_dev),
+        str(pyeda_dev),
+        str(bbf_times),
+        str(pyeda_times)
+    ]
+
+    print("\t".join(row))
diff --git a/benchmark/bench_expr_parser.py b/benchmark/bench_expr_parser.py
@@ -0,0 +1,57 @@
+import biodivine_boolean_functions as bbf
+from pyeda.inter import *
+from utils import run_bench, load_expressions
+import hashlib
+import sys
+
+# Some expressions are too large for PyEDA to process without this.
+sys.setrecursionlimit(100_000)
+
+"""
+This benchmark compares the performance of the Boolean expression
+parsers in each library on a set of expressions extracted from
+real world biological models.
+"""
+
+
+def bench_parse_bbf(e):
+    return bbf.Expression(e)
+
+
+def bench_parse_pyeda(e):
+    return expr(e)
+
+
+header = [
+    "hash + len",
+    "BBF[avg]",
+    "PyEDA[avg]",
+    "BBF[dev]",
+    "PyEDA[dev]",
+    "BBF[times]",
+    "PyEDA[times]"
+]
+print("\t".join(header))
+
+# Expressions should be sorted by length.
+for e_str in load_expressions('expressions.txt'):
+    (bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_parse_bbf(e_str))
+
+    e_str = e_str.replace("!", "~")  # PyEDA uses ~ as the negation operator.
+
+    (pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_parse_pyeda(e_str))
+
+    h = hashlib.new('sha256')
+    h.update(e_str.encode())
+
+    row = [
+        f"{h.hexdigest()}__{len(e_str)}",
+        str(bbf_avg),
+        str(pyeda_avg),
+        str(bbf_dev),
+        str(pyeda_dev),
+        str(bbf_times),
+        str(pyeda_times)
+    ]
+
+    print("\t".join(row))
diff --git a/benchmark/bench_table_subst.py b/benchmark/bench_table_subst.py
@@ -0,0 +1,65 @@
+import biodivine_boolean_functions as bbf
+from pyeda.inter import *
+from utils import run_bench
+
+"""
+This benchmark compares the performance of the logic table representations
+using a repeated substitution while increasing the variable count of the
+formula.
+
+The operation starts with a formula 'x_0' and it iteratively
+substitutes 'x_i' for '(x_i ^ x_{i+1})'', starting with i = 0. So in the
+end, we obtain a formula (x_0 ^ (x_1 ^ (x_2 ^ ... ( ^ x_n)))).
+
+This operation was chosen because it substitutes the variable for
+a formula that contains the variable itself, plus another, new variable.
+As such, it should be more complicated than basic operations on tables
+where the arity of the function does not change.
+"""
+
+
+def bench_subst_bbf(num_vars):
+    variable_names = [f"x_{i}" for i in range(num_vars)]
+    variables = [bbf.Table.from_expression(bbf.Expression(f"x_{i}")) for i in range(num_vars)]
+    table = variables[0]
+    for i in range(num_vars - 1):
+        exp = bbf.Table.mk_xor(variables[i], variables[i + 1])
+        table = table.substitute({variable_names[i]: exp})
+    return table
+
+
+def bench_subst_pyeda(num_vars):
+    variables = [ttvar(f"x_{i}") for i in range(num_vars)]
+    table = variables[0]
+    for i in range(num_vars - 1):
+        exp = variables[i] ^ variables[i + 1]
+        table = table.compose({variables[i]: exp})
+    return table
+
+
+header = [
+    "Var. count",
+    "BBF[avg]",
+    "PyEDA[avg]",
+    "BBF[dev]",
+    "PyEDA[dev]",
+    "BBF[times]",
+    "PyEDA[times]"
+]
+print("\t".join(header))
+
+for num_vars in range(2, 20):
+    (bbf_avg, bbf_dev, bbf_times) = run_bench(lambda: bench_subst_bbf(num_vars))
+    (pyeda_avg, pyeda_dev, pyeda_times) = run_bench(lambda: bench_subst_pyeda(num_vars))
+
+    row = [
+        str(num_vars),
+        str(bbf_avg),
+        str(pyeda_avg),
+        str(bbf_dev),
+        str(pyeda_dev),
+        str(bbf_times),
+        str(pyeda_times)
+    ]
+
+    print("\t".join(row))