From dbd24130e89655c583fce5de51b037c978f2995d Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Mon, 19 Aug 2024 18:23:50 -0600 Subject: [PATCH 1/7] Simplifying Hybrid BigM formulation as it too can be built from the GDP formulation (it just needs two transformations) --- src/omlt/linear_tree/lt_formulation.py | 96 ++------------------------ 1 file changed, 5 insertions(+), 91 deletions(-) diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py index 4f83e7f3..01cc6190 100644 --- a/src/omlt/linear_tree/lt_formulation.py +++ b/src/omlt/linear_tree/lt_formulation.py @@ -163,13 +163,17 @@ def _build_formulation(self): self.model_definition.scaled_input_bounds, ) - _add_hybrid_formulation_to_block( + _add_gdp_formulation_to_block( block=self.block, model_definition=self.model_definition, input_vars=self.block.scaled_inputs, output_vars=self.block.scaled_outputs, + transformation="custom", ) + pe.TransformationFactory('gdp.bound_pretransformation').apply_to(self.block) + pe.TransformationFactory('gdp.binary_multiplication').apply_to(self.block) + def _build_output_bounds(model_def, input_bounds): """ @@ -289,93 +293,3 @@ def disjunction_rule(b, tree): if transformation != "custom": pe.TransformationFactory(transformation_string).apply_to(block) - - -def _add_hybrid_formulation_to_block(block, model_definition, input_vars, output_vars): - """ - This function adds the Hybrid BigM representation to the OmltBlock - - Arguments: - block -- OmltBlock - model_definition -- LinearTreeDefinition Object - input_vars -- input variables to the linear tree model - output_vars -- output variable of the linear tree model - """ - leaves = model_definition.leaves - input_bounds = model_definition.scaled_input_bounds - n_inputs = model_definition.n_inputs - - # The set of trees - tree_ids = list(leaves.keys()) - # Create a list of tuples that contains the tree and leaf indices. Note that - # the leaf indices depend on the tree in the ensemble. - t_l = [] - for tree in tree_ids: - for leaf in leaves[tree].keys(): - t_l.append((tree, leaf)) - - features = np.arange(0, n_inputs) - - # Use the input_bounds and the linear models in the leaves to calculate - # the lower and upper bounds on the output variable. Required for Pyomo.GDP - output_bounds = _build_output_bounds(model_definition, input_bounds) - - # Ouptuts are automatically scaled based on whether inputs are scaled - block.outputs.setub(output_bounds[1]) - block.outputs.setlb(output_bounds[0]) - block.scaled_outputs.setub(output_bounds[1]) - block.scaled_outputs.setlb(output_bounds[0]) - - # Create the intermeditate variables. z is binary that indicates which leaf - # in tree t is returned. intermediate_output is the output of tree t and - # the total output of the model is the sum of the intermediate_output vars - block.z = pe.Var(t_l, within=pe.Binary) - block.intermediate_output = pe.Var(tree_ids) - - @block.Constraint(features, tree_ids) - def lower_bound_constraints(mdl, feat, tree): - leaf_ids = list(leaves[tree].keys()) - return ( - sum( - leaves[tree][leaf]["bounds"][feat][0] * mdl.z[tree, leaf] - for leaf in leaf_ids - ) - <= input_vars[feat] - ) - - @block.Constraint(features, tree_ids) - def upper_bound_constraints(mdl, feat, tree): - leaf_ids = list(leaves[tree].keys()) - return ( - sum( - leaves[tree][leaf]["bounds"][feat][1] * mdl.z[tree, leaf] - for leaf in leaf_ids - ) - >= input_vars[feat] - ) - - @block.Constraint(tree_ids) - def linear_constraint(mdl, tree): - leaf_ids = list(leaves[tree].keys()) - return block.intermediate_output[tree] == sum( - ( - sum( - leaves[tree][leaf]["slope"][feat] * input_vars[feat] - for feat in features - ) - + leaves[tree][leaf]["intercept"] - ) - * block.z[tree, leaf] - for leaf in leaf_ids - ) - - @block.Constraint(tree_ids) - def only_one_leaf_per_tree(b, tree): - leaf_ids = list(leaves[tree].keys()) - return sum(block.z[tree, leaf] for leaf in leaf_ids) == 1 - - @block.Constraint() - def output_sum_of_trees(b): - return output_vars[0] == sum( - block.intermediate_output[tree] for tree in tree_ids - ) From 53ba595ef7202c76de5fb6bc999fc62848792c5a Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Tue, 20 Aug 2024 17:08:54 -0600 Subject: [PATCH 2/7] Adding an optional epsilon to force the right (greater than) branches in linear trees to be strict up to a tolerance, adding a test that cheats without epsilon but not with --- src/omlt/linear_tree/lt_formulation.py | 12 ++++-- tests/linear_tree/test_lt_formulation.py | 52 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py index 01cc6190..847e1a90 100644 --- a/src/omlt/linear_tree/lt_formulation.py +++ b/src/omlt/linear_tree/lt_formulation.py @@ -48,7 +48,7 @@ class LinearTreeGDPFormulation(_PyomoFormulation): optimization development. Optimization and Engineering, 23:607–642 """ - def __init__(self, lt_definition, transformation="bigm"): + def __init__(self, lt_definition, transformation="bigm", epsilon=0): """ Create a LinearTreeGDPFormulation object @@ -66,6 +66,7 @@ def __init__(self, lt_definition, transformation="bigm"): super().__init__() self.model_definition = lt_definition self.transformation = transformation + self.epsilon = epsilon # Ensure that the GDP transformation given is supported supported_transformations = ["bigm", "hull", "mbigm", "custom"] @@ -100,6 +101,7 @@ def _build_formulation(self): input_vars=self.block.scaled_inputs, output_vars=self.block.scaled_outputs, transformation=self.transformation, + epsilon=self.epsilon ) @@ -133,7 +135,7 @@ class LinearTreeHybridBigMFormulation(_PyomoFormulation): """ - def __init__(self, lt_definition): + def __init__(self, lt_definition, epsilon=0): """ Create a LinearTreeHybridBigMFormulation object @@ -142,6 +144,7 @@ def __init__(self, lt_definition): """ super().__init__() self.model_definition = lt_definition + self.epsilon = epsilon @property def input_indexes(self): @@ -169,6 +172,7 @@ def _build_formulation(self): input_vars=self.block.scaled_inputs, output_vars=self.block.scaled_outputs, transformation="custom", + epsilon=self.epsilon, ) pe.TransformationFactory('gdp.bound_pretransformation').apply_to(self.block) @@ -219,7 +223,7 @@ def _build_output_bounds(model_def, input_bounds): def _add_gdp_formulation_to_block( - block, model_definition, input_vars, output_vars, transformation + block, model_definition, input_vars, output_vars, transformation, epsilon ): """ This function adds the GDP representation to the OmltBlock using Pyomo.GDP @@ -262,7 +266,7 @@ def _add_gdp_formulation_to_block( # and the linear model expression. def disjuncts_rule(dsj, tree, leaf): def lb_rule(dsj, feat): - return input_vars[feat] >= leaves[tree][leaf]["bounds"][feat][0] + return input_vars[feat] >= leaves[tree][leaf]["bounds"][feat][0] + epsilon dsj.lb_constraint = pe.Constraint(features, rule=lb_rule) diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py index 28f6f873..f770680d 100644 --- a/tests/linear_tree/test_lt_formulation.py +++ b/tests/linear_tree/test_lt_formulation.py @@ -190,6 +190,58 @@ def connect_outputs(mdl): assert y_pred[0] == approx(solution_1_bigm[1]) +def get_epsilon_test_model(formulation_lt): + model1 = pe.ConcreteModel() + model1.x = pe.Var(initialize=0) + model1.y = pe.Var(initialize=0) + model1.obj = pe.Objective(expr=model1.y, sense=pe.maximize) + model1.lt = OmltBlock() + model1.lt.build_formulation(formulation_lt) + + @model1.Constraint() + def connect_inputs(mdl): + return mdl.x == mdl.lt.inputs[0] + + @model1.Constraint() + def connect_outputs(mdl): + return mdl.y == mdl.lt.outputs[0] + + model1.x.fix(1.058749) + + return model1 + + +@pytest.mark.skipif( + not lineartree_available or not cbc_available, + reason="Need Linear-Tree Package and cbc", +) +def test_nonzero_epsilon(): + regr_small = linear_model_tree(X=X_small, y=y_small) + input_bounds = {0: (min(X_small)[0], max(X_small)[0])} + ltmodel_small = LinearTreeDefinition(regr_small, unscaled_input_bounds=input_bounds) + formulation_bad = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm", + epsilon=0) + formulation1_lt = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm", + epsilon=1e-4) + + model_good = get_epsilon_test_model(formulation1_lt) + model_bad = get_epsilon_test_model(formulation_bad) + + status_1_bigm = pe.SolverFactory("cbc").solve(model_bad) + pe.assert_optimal_termination(status_1_bigm) + solution_1_bigm = (pe.value(model_bad.x), pe.value(model_bad.y)) + y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1)) + # Without an epsilon, the model cheats and does not match the tree prediction + assert y_pred[0] != approx(solution_1_bigm[1]) + + status = pe.SolverFactory("cbc").solve(model_good) + pe.assert_optimal_termination(status) + solution = (pe.value(model_good.x), pe.value(model_good.y)) + y_pred = regr_small.predict(np.array(solution[0]).reshape(1, -1)) + # With epsilon, the model matches the tree prediction + assert y_pred[0] == approx(solution[1]) + + @pytest.mark.skipif( not lineartree_available or not cbc_available, reason="Need Linear-Tree Package and cbc", From f577f6ebd87ccd5823d5684940532b6ef4d114bb Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Tue, 20 Aug 2024 17:32:58 -0600 Subject: [PATCH 3/7] Running ruff --- tests/linear_tree/test_lt_formulation.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py index 33dd2755..b4da76da 100644 --- a/tests/linear_tree/test_lt_formulation.py +++ b/tests/linear_tree/test_lt_formulation.py @@ -223,10 +223,12 @@ def test_nonzero_epsilon(): regr_small = linear_model_tree(X=X_small, y=y_small) input_bounds = {0: (min(X_small)[0], max(X_small)[0])} ltmodel_small = LinearTreeDefinition(regr_small, unscaled_input_bounds=input_bounds) - formulation_bad = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm", - epsilon=0) - formulation1_lt = LinearTreeGDPFormulation(ltmodel_small, transformation="bigm", - epsilon=1e-4) + formulation_bad = LinearTreeGDPFormulation( + ltmodel_small, transformation="bigm", epsilon=0 + ) + formulation1_lt = LinearTreeGDPFormulation( + ltmodel_small, transformation="bigm", epsilon=1e-4 + ) model_good = get_epsilon_test_model(formulation1_lt) model_bad = get_epsilon_test_model(formulation_bad) From 60a5f26d5d0832bac3e6ebd31e91437751f918fd Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Tue, 20 Aug 2024 17:39:00 -0600 Subject: [PATCH 4/7] Fixing one more oops from the main merge --- tests/linear_tree/test_lt_formulation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py index b4da76da..db9121dd 100644 --- a/tests/linear_tree/test_lt_formulation.py +++ b/tests/linear_tree/test_lt_formulation.py @@ -238,14 +238,14 @@ def test_nonzero_epsilon(): solution_1_bigm = (pe.value(model_bad.x), pe.value(model_bad.y)) y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1)) # Without an epsilon, the model cheats and does not match the tree prediction - assert y_pred[0] != approx(solution_1_bigm[1]) + assert y_pred[0] != pytest.approx(solution_1_bigm[1]) status = pe.SolverFactory("cbc").solve(model_good) pe.assert_optimal_termination(status) solution = (pe.value(model_good.x), pe.value(model_good.y)) y_pred = regr_small.predict(np.array(solution[0]).reshape(1, -1)) # With epsilon, the model matches the tree prediction - assert y_pred[0] == approx(solution[1]) + assert y_pred[0] == pytest.approx(solution[1]) @pytest.mark.skipif( From 15ad95f6826d8094fb5f88298fd45edc1254cfc8 Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Wed, 21 Aug 2024 13:05:40 -0600 Subject: [PATCH 5/7] Moving pyomo dependency forward to version that both as Pyomo issue #3262 fix as well as the gdp.binary_multiplication transformation --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fab992f2..2b215f71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,8 @@ authors = [ dependencies = [ "networkx", "numpy", - # TODO: Remove constraint when fix to https://github.com/Pyomo/pyomo/issues/3262 is released - "pyomo==6.6.2", + # Pyomo release that included #3262 fix and has transformations for linear trees + "pyomo>=6.7.3", "onnx", "onnxruntime", ] From c03ea819fdd373d8b6c2b138e8d93beaa9cc109d Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Thu, 22 Aug 2024 07:22:04 -0600 Subject: [PATCH 6/7] Fixing ruff complaints --- src/omlt/linear_tree/lt_formulation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py index 450307b7..712a91b8 100644 --- a/src/omlt/linear_tree/lt_formulation.py +++ b/src/omlt/linear_tree/lt_formulation.py @@ -144,7 +144,7 @@ def __init__(self, lt_definition, epsilon=0): Arguments: lt_definition: LinearTreeDefinition Object - Keyword arguments: + Keyword Arguments: epsilon: Tolerance to use in enforcing that choosing the right branch of a linear tree node can only happen if the feature is strictly greater than the branch value.(default: 0) @@ -231,7 +231,7 @@ def _build_output_bounds(model_def, input_bounds): return bounds -def _add_gdp_formulation_to_block( +def _add_gdp_formulation_to_block( # noqa: PLR0913 block, model_definition, input_vars, output_vars, transformation, epsilon ): """This function adds the GDP representation to the OmltBlock using Pyomo.GDP. @@ -242,6 +242,9 @@ def _add_gdp_formulation_to_block( input_vars: input variables to the linear tree model output_vars: output variable of the linear tree model transformation: Transformation to apply + epsilon: Tolerance to use in enforcing that choosing the right + branch of a linear tree node can only happen if the feature + is strictly greater than the branch value. """ leaves = model_definition.leaves From d09862235e07a794ba3d4c0600b40b92c40ae8ea Mon Sep 17 00:00:00 2001 From: Emma Johnson Date: Thu, 22 Aug 2024 08:34:19 -0600 Subject: [PATCH 7/7] NFC: fixing last ruff formatting issue --- src/omlt/linear_tree/lt_formulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py index 712a91b8..d72df160 100644 --- a/src/omlt/linear_tree/lt_formulation.py +++ b/src/omlt/linear_tree/lt_formulation.py @@ -231,7 +231,7 @@ def _build_output_bounds(model_def, input_bounds): return bounds -def _add_gdp_formulation_to_block( # noqa: PLR0913 +def _add_gdp_formulation_to_block( # noqa: PLR0913 block, model_definition, input_vars, output_vars, transformation, epsilon ): """This function adds the GDP representation to the OmltBlock using Pyomo.GDP.