From dbd24130e89655c583fce5de51b037c978f2995d Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:23:50 -0600
Subject: [PATCH 1/7] Simplifying Hybrid BigM formulation as it too can be
 built from the GDP formulation (it just needs two transformations)

---
 src/omlt/linear_tree/lt_formulation.py | 96 ++------------------------
 1 file changed, 5 insertions(+), 91 deletions(-)

diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
index 4f83e7f3..01cc6190 100644
--- a/src/omlt/linear_tree/lt_formulation.py
+++ b/src/omlt/linear_tree/lt_formulation.py
@@ -163,13 +163,17 @@ def _build_formulation(self):
             self.model_definition.scaled_input_bounds,
         )
 
-        _add_hybrid_formulation_to_block(
+        _add_gdp_formulation_to_block(
             block=self.block,
             model_definition=self.model_definition,
             input_vars=self.block.scaled_inputs,
             output_vars=self.block.scaled_outputs,
+            transformation="custom",
         )
 
+        pe.TransformationFactory('gdp.bound_pretransformation').apply_to(self.block)
+        pe.TransformationFactory('gdp.binary_multiplication').apply_to(self.block)
+
 
 def _build_output_bounds(model_def, input_bounds):
     """
@@ -289,93 +293,3 @@ def disjunction_rule(b, tree):
 
     if transformation != "custom":
         pe.TransformationFactory(transformation_string).apply_to(block)
-
-
-def _add_hybrid_formulation_to_block(block, model_definition, input_vars, output_vars):
-    """
-    This function adds the Hybrid BigM representation to the OmltBlock
-
-    Arguments:
-        block -- OmltBlock
-        model_definition -- LinearTreeDefinition Object
-        input_vars -- input variables to the linear tree model
-        output_vars -- output variable of the linear tree model
-    """
-    leaves = model_definition.leaves
-    input_bounds = model_definition.scaled_input_bounds
-    n_inputs = model_definition.n_inputs
-
-    # The set of trees
-    tree_ids = list(leaves.keys())
-    # Create a list of tuples that contains the tree and leaf indices. Note that
-    # the leaf indices depend on the tree in the ensemble.
-    t_l = []
-    for tree in tree_ids:
-        for leaf in leaves[tree].keys():
-            t_l.append((tree, leaf))
-
-    features = np.arange(0, n_inputs)
-
-    # Use the input_bounds and the linear models in the leaves to calculate
-    # the lower and upper bounds on the output variable. Required for Pyomo.GDP
-    output_bounds = _build_output_bounds(model_definition, input_bounds)
-
-    # Ouptuts are automatically scaled based on whether inputs are scaled
-    block.outputs.setub(output_bounds[1])
-    block.outputs.setlb(output_bounds[0])
-    block.scaled_outputs.setub(output_bounds[1])
-    block.scaled_outputs.setlb(output_bounds[0])
-
-    # Create the intermeditate variables. z is binary that indicates which leaf
-    # in tree t is returned. intermediate_output is the output of tree t and
-    # the total output of the model is the sum of the intermediate_output vars
-    block.z = pe.Var(t_l, within=pe.Binary)
-    block.intermediate_output = pe.Var(tree_ids)
-
-    @block.Constraint(features, tree_ids)
-    def lower_bound_constraints(mdl, feat, tree):
-        leaf_ids = list(leaves[tree].keys())
-        return (
-            sum(
-                leaves[tree][leaf]["bounds"][feat][0] * mdl.z[tree, leaf]
-                for leaf in leaf_ids
-            )
-            <= input_vars[feat]
-        )
-
-    @block.Constraint(features, tree_ids)
-    def upper_bound_constraints(mdl, feat, tree):
-        leaf_ids = list(leaves[tree].keys())
-        return (
-            sum(
-                leaves[tree][leaf]["bounds"][feat][1] * mdl.z[tree, leaf]
-                for leaf in leaf_ids
-            )
-            >= input_vars[feat]
-        )
-
-    @block.Constraint(tree_ids)
-    def linear_constraint(mdl, tree):
-        leaf_ids = list(leaves[tree].keys())
-        return block.intermediate_output[tree] == sum(
-            (
-                sum(
-                    leaves[tree][leaf]["slope"][feat] * input_vars[feat]
-                    for feat in features
-                )
-                + leaves[tree][leaf]["intercept"]
-            )
-            * block.z[tree, leaf]
-            for leaf in leaf_ids
-        )
-
-    @block.Constraint(tree_ids)
-    def only_one_leaf_per_tree(b, tree):
-        leaf_ids = list(leaves[tree].keys())
-        return sum(block.z[tree, leaf] for leaf in leaf_ids) == 1
-
-    @block.Constraint()
-    def output_sum_of_trees(b):
-        return output_vars[0] == sum(
-            block.intermediate_output[tree] for tree in tree_ids
-        )

From 53ba595ef7202c76de5fb6bc999fc62848792c5a Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:08:54 -0600
Subject: [PATCH 2/7] Adding an optional epsilon to force the right (greater
 than) branches in linear trees to be strict up to a tolerance, adding a test
 that cheats without epsilon but not with

---
 src/omlt/linear_tree/lt_formulation.py   | 12 ++++--
 tests/linear_tree/test_lt_formulation.py | 52 ++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
index 01cc6190..847e1a90 100644
--- a/src/omlt/linear_tree/lt_formulation.py
+++ b/src/omlt/linear_tree/lt_formulation.py
@@ -48,7 +48,7 @@ class LinearTreeGDPFormulation(_PyomoFormulation):
           optimization development. Optimization and Engineering, 23:607–642
     """
 
-    def __init__(self, lt_definition, transformation="bigm"):
+    def __init__(self, lt_definition, transformation="bigm", epsilon=0):
         """
         Create a LinearTreeGDPFormulation object
 
@@ -66,6 +66,7 @@ def __init__(self, lt_definition, transformation="bigm"):
         super().__init__()
         self.model_definition = lt_definition
         self.transformation = transformation
+        self.epsilon = epsilon
 
         # Ensure that the GDP transformation given is supported
         supported_transformations = ["bigm", "hull", "mbigm", "custom"]
@@ -100,6 +101,7 @@ def _build_formulation(self):
             input_vars=self.block.scaled_inputs,
             output_vars=self.block.scaled_outputs,
             transformation=self.transformation,
+            epsilon=self.epsilon
         )
 
 
@@ -133,7 +135,7 @@ class LinearTreeHybridBigMFormulation(_PyomoFormulation):
 
     """
 
-    def __init__(self, lt_definition):
+    def __init__(self, lt_definition, epsilon=0):
         """
         Create a LinearTreeHybridBigMFormulation object
 
@@ -142,6 +144,7 @@ def __init__(self, lt_definition):
         """
         super().__init__()
         self.model_definition = lt_definition
+        self.epsilon = epsilon
 
     @property
     def input_indexes(self):
@@ -169,6 +172,7 @@ def _build_formulation(self):
             input_vars=self.block.scaled_inputs,
             output_vars=self.block.scaled_outputs,
             transformation="custom",
+            epsilon=self.epsilon,
         )
 
         pe.TransformationFactory('gdp.bound_pretransformation').apply_to(self.block)
@@ -219,7 +223,7 @@ def _build_output_bounds(model_def, input_bounds):
 
 
 def _add_gdp_formulation_to_block(
-    block, model_definition, input_vars, output_vars, transformation
+    block, model_definition, input_vars, output_vars, transformation, epsilon
 ):
     """
     This function adds the GDP representation to the OmltBlock using Pyomo.GDP
@@ -262,7 +266,7 @@ def _add_gdp_formulation_to_block(
     # and the linear model expression.
     def disjuncts_rule(dsj, tree, leaf):
         def lb_rule(dsj, feat):
-            return input_vars[feat] >= leaves[tree][leaf]["bounds"][feat][0]
+            return input_vars[feat] >= leaves[tree][leaf]["bounds"][feat][0] + epsilon
 
         dsj.lb_constraint = pe.Constraint(features, rule=lb_rule)
 
diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py
index 28f6f873..f770680d 100644
--- a/tests/linear_tree/test_lt_formulation.py
+++ b/tests/linear_tree/test_lt_formulation.py
@@ -190,6 +190,58 @@ def connect_outputs(mdl):
     assert y_pred[0] == approx(solution_1_bigm[1])
 
 
+def get_epsilon_test_model(formulation_lt):
+    model1 = pe.ConcreteModel()
+    model1.x = pe.Var(initialize=0)
+    model1.y = pe.Var(initialize=0)
+    model1.obj = pe.Objective(expr=model1.y, sense=pe.maximize)
+    model1.lt = OmltBlock()
+    model1.lt.build_formulation(formulation_lt)
+
+    @model1.Constraint()
+    def connect_inputs(mdl):
+        return mdl.x == mdl.lt.inputs[0]
+
+    @model1.Constraint()
+    def connect_outputs(mdl):
+        return mdl.y == mdl.lt.outputs[0]
+
+    model1.x.fix(1.058749)
+
+    return model1
+
+
+@pytest.mark.skipif(
+    not lineartree_available or not cbc_available,
+    reason="Need Linear-Tree Package and cbc",
+)
+def test_nonzero_epsilon():
+    regr_small = linear_model_tree(X=X_small, y=y_small)
+    input_bounds = {0: (min(X_small)[0], max(X_small)[0])}
+    ltmodel_small = LinearTreeDefinition(regr_small, unscaled_input_bounds=input_bounds)
+    formulation_bad = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm",
+                                               epsilon=0)
+    formulation1_lt = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm",
+                                               epsilon=1e-4)
+
+    model_good = get_epsilon_test_model(formulation1_lt)
+    model_bad = get_epsilon_test_model(formulation_bad)
+
+    status_1_bigm = pe.SolverFactory("cbc").solve(model_bad)
+    pe.assert_optimal_termination(status_1_bigm)
+    solution_1_bigm = (pe.value(model_bad.x), pe.value(model_bad.y))
+    y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
+    # Without an epsilon, the model cheats and does not match the tree prediction
+    assert y_pred[0] != approx(solution_1_bigm[1])
+
+    status = pe.SolverFactory("cbc").solve(model_good)
+    pe.assert_optimal_termination(status)
+    solution = (pe.value(model_good.x), pe.value(model_good.y))
+    y_pred = regr_small.predict(np.array(solution[0]).reshape(1, -1))
+    # With epsilon, the model matches the tree prediction
+    assert y_pred[0] == approx(solution[1])
+
+
 @pytest.mark.skipif(
     not lineartree_available or not cbc_available,
     reason="Need Linear-Tree Package and cbc",

From f577f6ebd87ccd5823d5684940532b6ef4d114bb Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:32:58 -0600
Subject: [PATCH 3/7] Running ruff

---
 tests/linear_tree/test_lt_formulation.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py
index 33dd2755..b4da76da 100644
--- a/tests/linear_tree/test_lt_formulation.py
+++ b/tests/linear_tree/test_lt_formulation.py
@@ -223,10 +223,12 @@ def test_nonzero_epsilon():
     regr_small = linear_model_tree(X=X_small, y=y_small)
     input_bounds = {0: (min(X_small)[0], max(X_small)[0])}
     ltmodel_small = LinearTreeDefinition(regr_small, unscaled_input_bounds=input_bounds)
-    formulation_bad = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm",
-                                               epsilon=0)
-    formulation1_lt = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm",
-                                               epsilon=1e-4)
+    formulation_bad = LinearTreeGDPFormulation(
+        ltmodel_small, transformation="bigm", epsilon=0
+    )
+    formulation1_lt = LinearTreeGDPFormulation(
+        ltmodel_small, transformation="bigm", epsilon=1e-4
+    )
 
     model_good = get_epsilon_test_model(formulation1_lt)
     model_bad = get_epsilon_test_model(formulation_bad)

From 60a5f26d5d0832bac3e6ebd31e91437751f918fd Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:39:00 -0600
Subject: [PATCH 4/7] Fixing one more oops from the main merge

---
 tests/linear_tree/test_lt_formulation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py
index b4da76da..db9121dd 100644
--- a/tests/linear_tree/test_lt_formulation.py
+++ b/tests/linear_tree/test_lt_formulation.py
@@ -238,14 +238,14 @@ def test_nonzero_epsilon():
     solution_1_bigm = (pe.value(model_bad.x), pe.value(model_bad.y))
     y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
     # Without an epsilon, the model cheats and does not match the tree prediction
-    assert y_pred[0] != approx(solution_1_bigm[1])
+    assert y_pred[0] != pytest.approx(solution_1_bigm[1])
 
     status = pe.SolverFactory("cbc").solve(model_good)
     pe.assert_optimal_termination(status)
     solution = (pe.value(model_good.x), pe.value(model_good.y))
     y_pred = regr_small.predict(np.array(solution[0]).reshape(1, -1))
     # With epsilon, the model matches the tree prediction
-    assert y_pred[0] == approx(solution[1])
+    assert y_pred[0] == pytest.approx(solution[1])
 
 
 @pytest.mark.skipif(

From 15ad95f6826d8094fb5f88298fd45edc1254cfc8 Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Wed, 21 Aug 2024 13:05:40 -0600
Subject: [PATCH 5/7] Moving pyomo dependency forward to version that both as
 Pyomo issue #3262 fix as well as the gdp.binary_multiplication transformation

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fab992f2..2b215f71 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,8 +11,8 @@ authors = [
 dependencies = [
   "networkx",
   "numpy",
-  # TODO: Remove constraint when fix to https://github.com/Pyomo/pyomo/issues/3262 is released
-  "pyomo==6.6.2",
+  # Pyomo release that included #3262 fix and has transformations for linear trees
+  "pyomo>=6.7.3",
   "onnx",
   "onnxruntime",
 ]

From c03ea819fdd373d8b6c2b138e8d93beaa9cc109d Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Thu, 22 Aug 2024 07:22:04 -0600
Subject: [PATCH 6/7] Fixing ruff complaints

---
 src/omlt/linear_tree/lt_formulation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
index 450307b7..712a91b8 100644
--- a/src/omlt/linear_tree/lt_formulation.py
+++ b/src/omlt/linear_tree/lt_formulation.py
@@ -144,7 +144,7 @@ def __init__(self, lt_definition, epsilon=0):
         Arguments:
             lt_definition: LinearTreeDefinition Object
 
-        Keyword arguments:
+        Keyword Arguments:
             epsilon: Tolerance to use in enforcing that choosing the right
                 branch of a linear tree node can only happen if the feature
                 is strictly greater than the branch value.(default: 0)
@@ -231,7 +231,7 @@ def _build_output_bounds(model_def, input_bounds):
     return bounds
 
 
-def _add_gdp_formulation_to_block(
+def _add_gdp_formulation_to_block( # noqa: PLR0913
     block, model_definition, input_vars, output_vars, transformation, epsilon
 ):
     """This function adds the GDP representation to the OmltBlock using Pyomo.GDP.
@@ -242,6 +242,9 @@ def _add_gdp_formulation_to_block(
         input_vars: input variables to the linear tree model
         output_vars: output variable of the linear tree model
         transformation: Transformation to apply
+        epsilon: Tolerance to use in enforcing that choosing the right
+            branch of a linear tree node can only happen if the feature
+            is strictly greater than the branch value.
 
     """
     leaves = model_definition.leaves

From d09862235e07a794ba3d4c0600b40b92c40ae8ea Mon Sep 17 00:00:00 2001
From: Emma Johnson <emma58@users.noreply.github.com>
Date: Thu, 22 Aug 2024 08:34:19 -0600
Subject: [PATCH 7/7] NFC: fixing last ruff formatting issue

---
 src/omlt/linear_tree/lt_formulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
index 712a91b8..d72df160 100644
--- a/src/omlt/linear_tree/lt_formulation.py
+++ b/src/omlt/linear_tree/lt_formulation.py
@@ -231,7 +231,7 @@ def _build_output_bounds(model_def, input_bounds):
     return bounds
 
 
-def _add_gdp_formulation_to_block( # noqa: PLR0913
+def _add_gdp_formulation_to_block(  # noqa: PLR0913
     block, model_definition, input_vars, output_vars, transformation, epsilon
 ):
     """This function adds the GDP representation to the OmltBlock using Pyomo.GDP.