From fa4e28ea199c3f9842a2286fef902fab392308f2 Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 17 Feb 2022 14:17:33 -0800 Subject: [PATCH 01/35] removes nested for loops; reduces n to 500 --- econml/tests/test_driv.py | 271 +++++++++++++++++++------------------- 1 file changed, 139 insertions(+), 132 deletions(-) diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py index ed3b88eb0..e4f6b76de 100644 --- a/econml/tests/test_driv.py +++ b/econml/tests/test_driv.py @@ -1,17 +1,18 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import unittest +from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,) +from econml.iv.dr._dr import _DummyCATE +from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression +from econml.utilities import shape + +import itertools +import numpy as np import pytest import pickle -import numpy as np from scipy import special -from sklearn.linear_model import LinearRegression, LogisticRegression -from econml.iv.dr._dr import _DummyCATE -from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression from sklearn.preprocessing import PolynomialFeatures -from econml.utilities import shape -from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,) +import unittest class TestDRIV(unittest.TestCase): @@ -25,137 +26,143 @@ def marg_eff_shape(n, binary_T): def eff_shape(n, d_x): return (n if d_x else 1,) - n = 1000 + n = 500 y = np.random.normal(size=(n,)) - for d_w in [None, 10]: + for d_w, d_x, binary_T, binary_Z, projection, featurizer in itertools.product( + [None, 10], # d_w + [None, 3], # d_x + [True, False], # binary_T + [True, False], # binary_Z + [True, False], # projection + [None, # featureizer + PolynomialFeatures(degree=2, include_bias=False),] + ): + if d_w is None: W = None else: W = np.random.normal(size=(n, d_w)) - for d_x in [None, 3]: - if d_x is None: - X = None - else: - X = np.random.normal(size=(n, d_x)) - for binary_T in [True, False]: - if binary_T: - T = np.random.choice(["a", "b"], size=(n,)) - else: - T = np.random.normal(size=(n,)) - for binary_Z in [True, False]: - if binary_Z: - Z = np.random.choice(["c", "d"], size=(n,)) - else: - Z = np.random.normal(size=(n,)) - for projection in [True, False]: - for featurizer in [ - None, - PolynomialFeatures(degree=2, include_bias=False), - ]: - est_list = [ - DRIV( - flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), - model_final=StatsModelsLinearRegression( - fit_intercept=False - ), - fit_cate_intercept=True, - projection=projection, - discrete_instrument=binary_Z, - discrete_treatment=binary_T, - featurizer=featurizer, - ), - LinearDRIV( - flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), - fit_cate_intercept=True, - projection=projection, - discrete_instrument=binary_Z, - discrete_treatment=binary_T, - featurizer=featurizer, - ), - SparseLinearDRIV( - flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), - fit_cate_intercept=True, - projection=projection, - discrete_instrument=binary_Z, - discrete_treatment=binary_T, - featurizer=featurizer, - ), - ForestDRIV( - flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), - projection=projection, - discrete_instrument=binary_Z, - discrete_treatment=binary_T, - featurizer=featurizer, - ), - ] - - if X is None: - est_list = est_list[:-1] - - if binary_T and binary_Z: - est_list += [ - IntentToTreatDRIV( - flexible_model_effect=StatsModelsLinearRegression( - fit_intercept=False - ), - fit_cate_intercept=True, - featurizer=featurizer, - ), - LinearIntentToTreatDRIV( - flexible_model_effect=StatsModelsLinearRegression( - fit_intercept=False - ), - featurizer=featurizer, - ), - ] - - for est in est_list: - with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, - projection=projection, featurizer=featurizer, - est=est): - - # ensure we can serialize unfit estimator - pickle.dumps(est) - - est.fit(y, T, Z=Z, X=X, W=W) - - # ensure we can serialize fit estimator - pickle.dumps(est) - - # expected effect size - const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) - marginal_effect_shape = marg_eff_shape(n, binary_T) - effect_shape = eff_shape(n, d_x) - # test effect - const_marg_eff = est.const_marginal_effect(X) - self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) - marg_eff = est.marginal_effect(T, X) - self.assertEqual(shape(marg_eff), marginal_effect_shape) - T0 = "a" if binary_T else 0 - T1 = "b" if binary_T else 1 - eff = est.effect(X, T0=T0, T1=T1) - self.assertEqual(shape(eff), effect_shape) - - # test inference - const_marg_eff_int = est.const_marginal_effect_interval(X) - marg_eff_int = est.marginal_effect_interval(T, X) - eff_int = est.effect_interval(X, T0=T0, T1=T1) - self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) - self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) - self.assertEqual(shape(eff_int), (2,) + effect_shape) - - # test can run score - est.score(y, T, Z=Z, X=X, W=W) - - if X is not None: - # test cate_feature_names - expect_feat_len = featurizer.fit( - X).n_output_features_ if featurizer else d_x - self.assertEqual(len(est.cate_feature_names()), expect_feat_len) - - # test can run shap values - shap_values = est.shap_values(X[:10]) + + if d_x is None: + X = None + else: + X = np.random.normal(size=(n, d_x)) + + if binary_T: + T = np.random.choice(["a", "b"], size=(n,)) + else: + T = np.random.normal(size=(n,)) + + if binary_Z: + Z = np.random.choice(["c", "d"], size=(n,)) + else: + Z = np.random.normal(size=(n,)) + + est_list = [ + DRIV( + flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), + model_final=StatsModelsLinearRegression( + fit_intercept=False + ), + fit_cate_intercept=True, + projection=projection, + discrete_instrument=binary_Z, + discrete_treatment=binary_T, + featurizer=featurizer, + ), + LinearDRIV( + flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), + fit_cate_intercept=True, + projection=projection, + discrete_instrument=binary_Z, + discrete_treatment=binary_T, + featurizer=featurizer, + ), + SparseLinearDRIV( + flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), + fit_cate_intercept=True, + projection=projection, + discrete_instrument=binary_Z, + discrete_treatment=binary_T, + featurizer=featurizer, + ), + ForestDRIV( + flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), + projection=projection, + discrete_instrument=binary_Z, + discrete_treatment=binary_T, + featurizer=featurizer, + ), + ] + + if X is None: + est_list = est_list[:-1] + + if binary_T and binary_Z: + est_list += [ + IntentToTreatDRIV( + flexible_model_effect=StatsModelsLinearRegression( + fit_intercept=False + ), + fit_cate_intercept=True, + featurizer=featurizer, + ), + LinearIntentToTreatDRIV( + flexible_model_effect=StatsModelsLinearRegression( + fit_intercept=False + ), + featurizer=featurizer, + ), + ] + + for est in est_list: + with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, + projection=projection, featurizer=featurizer, + est=est): + + # ensure we can serialize unfit estimator + pickle.dumps(est) + + est.fit(y, T, Z=Z, X=X, W=W) + + # ensure we can serialize fit estimator + pickle.dumps(est) + + # expected effect size + const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) + marginal_effect_shape = marg_eff_shape(n, binary_T) + effect_shape = eff_shape(n, d_x) + + # test effect + const_marg_eff = est.const_marginal_effect(X) + self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) + marg_eff = est.marginal_effect(T, X) + self.assertEqual(shape(marg_eff), marginal_effect_shape) + T0 = "a" if binary_T else 0 + T1 = "b" if binary_T else 1 + eff = est.effect(X, T0=T0, T1=T1) + self.assertEqual(shape(eff), effect_shape) + + # test inference + const_marg_eff_int = est.const_marginal_effect_interval(X) + marg_eff_int = est.marginal_effect_interval(T, X) + eff_int = est.effect_interval(X, T0=T0, T1=T1) + self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) + self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) + self.assertEqual(shape(eff_int), (2,) + effect_shape) + + # test can run score + est.score(y, T, Z=Z, X=X, W=W) + + if X is not None: + # test cate_feature_names + expect_feat_len = featurizer.fit( + X).n_output_features_ if featurizer else d_x + self.assertEqual(len(est.cate_feature_names()), expect_feat_len) + + # test can run shap values + shap_values = est.shap_values(X[:10]) def test_accuracy(self): np.random.seed(123) From a4eb09f37557c389481faae279b29b9df0a1089b Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 17 Feb 2022 15:03:42 -0800 Subject: [PATCH 02/35] linting fixes --- econml/tests/test_driv.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py index e4f6b76de..a8505d9ef 100644 --- a/econml/tests/test_driv.py +++ b/econml/tests/test_driv.py @@ -29,15 +29,15 @@ def eff_shape(n, d_x): n = 500 y = np.random.normal(size=(n,)) - for d_w, d_x, binary_T, binary_Z, projection, featurizer in itertools.product( - [None, 10], # d_w - [None, 3], # d_x - [True, False], # binary_T - [True, False], # binary_Z - [True, False], # projection - [None, # featureizer - PolynomialFeatures(degree=2, include_bias=False),] - ): + # parameter combinations to test + for d_w, d_x, binary_T, binary_Z, projection, featurizer\ + in itertools.product( + [None, 10], # d_w + [None, 3], # d_x + [True, False], # binary_T + [True, False], # binary_Z + [True, False], # projection + [None, PolynomialFeatures(degree=2, include_bias=False), ]): # featurizer if d_w is None: W = None @@ -48,7 +48,7 @@ def eff_shape(n, d_x): X = None else: X = np.random.normal(size=(n, d_x)) - + if binary_T: T = np.random.choice(["a", "b"], size=(n,)) else: @@ -58,7 +58,7 @@ def eff_shape(n, d_x): Z = np.random.choice(["c", "d"], size=(n,)) else: Z = np.random.normal(size=(n,)) - + est_list = [ DRIV( flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), @@ -117,9 +117,9 @@ def eff_shape(n, d_x): ] for est in est_list: - with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, - projection=projection, featurizer=featurizer, - est=est): + with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, + binary_Z=binary_Z, projection=projection, featurizer=featurizer, + est=est): # ensure we can serialize unfit estimator pickle.dumps(est) @@ -133,7 +133,7 @@ def eff_shape(n, d_x): const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) marginal_effect_shape = marg_eff_shape(n, binary_T) effect_shape = eff_shape(n, d_x) - + # test effect const_marg_eff = est.const_marginal_effect(X) self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) From dc47b6affb7a42454836b1e8022a628eabc878d6 Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 17 Feb 2022 15:10:16 -0800 Subject: [PATCH 03/35] reorganize imports --- econml/tests/test_dmliv.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py index db8b328d8..a1dbd68d2 100644 --- a/econml/tests/test_dmliv.py +++ b/econml/tests/test_dmliv.py @@ -1,18 +1,21 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import unittest -import pytest import pickle +import unittest + import numpy as np +import pytest from scipy import special -from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.preprocessing import PolynomialFeatures + +from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV from econml.iv.dr._dr import _DummyCATE from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression -from sklearn.preprocessing import PolynomialFeatures from econml.utilities import shape -from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV + class TestDMLIV(unittest.TestCase): From 8e4682fc49e0f0577acbfacf397de1befce8be70 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 10:49:27 -0800 Subject: [PATCH 04/35] removes unused imports --- econml/tests/test_tree.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py index 7de214f6c..de18c9fd8 100644 --- a/econml/tests/test_tree.py +++ b/econml/tests/test_tree.py @@ -2,12 +2,9 @@ # Licensed under the MIT License. import unittest -import logging -import time -import random + import numpy as np -import sparse as sp -import pytest + from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE From 2cb7f40a4a3cc65a5977f063270c27e729f436e1 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 13:28:22 -0800 Subject: [PATCH 05/35] runs tree tests serially --- azure-pipelines.yml | 29 ++++++++++++++++++++++++++++- econml/tests/test_tree.py | 18 +++++++++++++++++- pyproject.toml | 5 +++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4c80ed729..fbfe371e5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -207,7 +207,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2' + PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial)" -n 2' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' @@ -270,6 +270,33 @@ jobs: testRunTitle: 'Python $(python.version), image $(imageName)' condition: succeededOrFailed() + - task: PublishCodeCoverageResults@1 + displayName: 'Publish Code Coverage Results' + inputs: + codeCoverageTool: Cobertura + summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' + +- template: azure-pipelines-steps.yml + parameters: + package: '-e .[tf,plt]' + job: + job: Tests_serial + dependsOn: 'EvalChanges' + condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') + displayName: 'Run tests (Causal)' + steps: + - script: 'pip install pytest pytest-runner && python setup.py pytest' + displayName: 'Unit tests' + env: + PYTEST_ADDOPTS: '-m "serial" -n 1' + COVERAGE_PROCESS_START: 'setup.cfg' + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/test-results.xml' + inputs: + testResultsFiles: '**/test-results.xml' + testRunTitle: 'Python $(python.version), image $(imageName)' + condition: succeededOrFailed() + - task: PublishCodeCoverageResults@1 displayName: 'Publish Code Coverage Results' inputs: diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py index de18c9fd8..b6842c4a1 100644 --- a/econml/tests/test_tree.py +++ b/econml/tests/test_tree.py @@ -4,10 +4,11 @@ import unittest import numpy as np +import pytest from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE - +@pytest.mark.serial class TestTree(unittest.TestCase): def _get_base_config(self): @@ -256,10 +257,17 @@ def test_honest_values(self): np.testing.assert_array_almost_equal(tree.value.flatten(), .4 * np.ones(len(tree.value))) def test_noisy_instance(self): + """ + The purpose of this test + """ + + # initialize parameters n_samples = 5000 X = np.random.normal(0, 1, size=(n_samples, 1)) y_base = 1.0 * X[:, [0]] * (X[:, [0]] > 0) y = y_base + np.random.normal(0, .1, size=(n_samples, 1)) + + # initialize config wtih base config and overwite some values config = self._get_base_config() config['n_features'] = 1 config['max_features'] = 1 @@ -271,11 +279,16 @@ def test_noisy_instance(self): config['max_node_samples'] = X.shape[0] config['samples_train'] = np.arange(X.shape[0], dtype=np.intp) config['samples_val'] = np.arange(X.shape[0], dtype=np.intp) + + # predict tree using config parameters and assert + # shape of trained tree is the same as y_test tree = self._train_tree(config, X, y) X_test = np.zeros((100, 1)) X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100) y_test = 1.0 * X_test[:, [0]] * (X_test[:, [0]] > 0) np.testing.assert_array_almost_equal(tree.predict(X_test), y_test, decimal=1) + + # initialize config wtih base honest config and overwite some values config = self._get_base_honest_config() config['n_features'] = 1 config['max_features'] = 1 @@ -287,6 +300,9 @@ def test_noisy_instance(self): config['max_node_samples'] = X.shape[0] // 2 config['samples_train'] = np.arange(X.shape[0] // 2, dtype=np.intp) config['samples_val'] = np.arange(X.shape[0] // 2, X.shape[0], dtype=np.intp) + + # predict tree using config parameters and assert + # shape of trained tree is the same as y_test tree = self._train_tree(config, X, y) X_test = np.zeros((100, 1)) X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100) diff --git a/pyproject.toml b/pyproject.toml index 9f0652dbb..82048ea08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,11 +9,12 @@ requires = [ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] -addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml" +# addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml" markers = [ "slow", "notebook", "automl", "dml", - "causal" + "causal", + "serial" ] \ No newline at end of file From 0cccf374ac4f0f5c25d8da072abfd5bf466a8568 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 13:42:08 -0800 Subject: [PATCH 06/35] fixes typo --- azure-pipelines.yml | 2 +- econml/tests/test_tree.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index fbfe371e5..dcaa3a0a1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -283,7 +283,7 @@ jobs: job: Tests_serial dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') - displayName: 'Run tests (Causal)' + displayName: 'Run tests (Serial)' steps: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py index b6842c4a1..7267b93fc 100644 --- a/econml/tests/test_tree.py +++ b/econml/tests/test_tree.py @@ -8,6 +8,7 @@ from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE + @pytest.mark.serial class TestTree(unittest.TestCase): @@ -302,7 +303,7 @@ def test_noisy_instance(self): config['samples_val'] = np.arange(X.shape[0] // 2, X.shape[0], dtype=np.intp) # predict tree using config parameters and assert - # shape of trained tree is the same as y_test + # shape of trained tree is the same as y_test tree = self._train_tree(config, X, y) X_test = np.zeros((100, 1)) X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100) From 684e1c70f5d4988a309979ec6ccd3c9929819bf9 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 13:50:57 -0800 Subject: [PATCH 07/35] linting fix --- econml/tests/test_dmliv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py index a1dbd68d2..9e73b491f 100644 --- a/econml/tests/test_dmliv.py +++ b/econml/tests/test_dmliv.py @@ -17,7 +17,6 @@ from econml.utilities import shape - class TestDMLIV(unittest.TestCase): def test_cate_api(self): def const_marg_eff_shape(n, d_x, d_y, binary_T): From 0b951fd1cc4c7eb3963b75e59a26a46486c3e44a Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 14:26:03 -0800 Subject: [PATCH 08/35] reorganize imports --- econml/tests/test_drlearner.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py index 3674aa7af..9315ad618 100644 --- a/econml/tests/test_drlearner.py +++ b/econml/tests/test_drlearner.py @@ -1,26 +1,27 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import numpy as np +from contextlib import ExitStack +import pickle import unittest + +import numpy as np +from numpy.random import normal, multivariate_normal, binomial import pytest -import pickle + +import scipy.special from sklearn.base import TransformerMixin -from numpy.random import normal, multivariate_normal, binomial +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor from sklearn.exceptions import DataConversionWarning from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import OneHotEncoder, FunctionTransformer from sklearn.model_selection import KFold, GroupKFold -from sklearn.preprocessing import PolynomialFeatures +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures + from econml.dr import DRLearner, LinearDRLearner, SparseLinearDRLearner, ForestDRLearner -from econml.utilities import shape, hstack, vstack, reshape, cross_product from econml.inference import BootstrapInference, StatsModelsInferenceDiscrete -from contextlib import ExitStack -from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor -from sklearn.linear_model import LinearRegression, LogisticRegression +from econml.utilities import shape, hstack, vstack, reshape, cross_product from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression -import scipy.special import econml.tests.utilities # bugfix for assertWarns From d1068c0c8a28a8849305a4644064405700c0e07a Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 14:34:21 -0800 Subject: [PATCH 09/35] run test serially --- econml/tests/test_drlearner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py index 9315ad618..1f81bd0ab 100644 --- a/econml/tests/test_drlearner.py +++ b/econml/tests/test_drlearner.py @@ -25,6 +25,7 @@ import econml.tests.utilities # bugfix for assertWarns +@pytest.mark.serial class TestDRLearner(unittest.TestCase): @classmethod From 33c580f0d369b17a9c223b0f3a645002f961b81e Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 15:38:51 -0800 Subject: [PATCH 10/35] adds tags for cate_api tests --- econml/tests/test_dmliv.py | 1 + econml/tests/test_driv.py | 29 +++++++++++++++++++---------- econml/tests/test_dynamic_dml.py | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py index 9e73b491f..54175d0c9 100644 --- a/econml/tests/test_dmliv.py +++ b/econml/tests/test_dmliv.py @@ -17,6 +17,7 @@ from econml.utilities import shape +@pytest.mark.cate_api class TestDMLIV(unittest.TestCase): def test_cate_api(self): def const_marg_eff_shape(n, d_x, d_y, binary_T): diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py index a8505d9ef..a9e0d30be 100644 --- a/econml/tests/test_driv.py +++ b/econml/tests/test_driv.py @@ -15,15 +15,19 @@ import unittest +@pytest.mark.cate_api class TestDRIV(unittest.TestCase): def test_cate_api(self): def const_marg_eff_shape(n, d_x, binary_T): + """Constant marginal effect shape.""" return (n if d_x else 1,) + ((1,) if binary_T else ()) def marg_eff_shape(n, binary_T): + """Marginal effect shape.""" return (n,) + ((1,) if binary_T else ()) def eff_shape(n, d_x): + "Effect shape." return (n if d_x else 1,) n = 500 @@ -121,34 +125,39 @@ def eff_shape(n, d_x): binary_Z=binary_Z, projection=projection, featurizer=featurizer, est=est): + # TODO: serializing/deserializing for every combination -- is this necessary? # ensure we can serialize unfit estimator - pickle.dumps(est) + # pickle.dumps(est) est.fit(y, T, Z=Z, X=X, W=W) # ensure we can serialize fit estimator - pickle.dumps(est) + # pickle.dumps(est) # expected effect size - const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) + exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) marginal_effect_shape = marg_eff_shape(n, binary_T) effect_shape = eff_shape(n, d_x) - # test effect + # assert calculated constant marginal effect shape is expected + # const_marginal effect is defined in LinearCateEstimator class const_marg_eff = est.const_marginal_effect(X) - self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) + self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape) + + # assert calculated marginal effect shape is expected marg_eff = est.marginal_effect(T, X) self.assertEqual(shape(marg_eff), marginal_effect_shape) + T0 = "a" if binary_T else 0 T1 = "b" if binary_T else 1 eff = est.effect(X, T0=T0, T1=T1) self.assertEqual(shape(eff), effect_shape) # test inference - const_marg_eff_int = est.const_marginal_effect_interval(X) - marg_eff_int = est.marginal_effect_interval(T, X) - eff_int = est.effect_interval(X, T0=T0, T1=T1) - self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) + const_marg_eff_int = est.const_marginal_effect_interval(X) # defer to infere + marg_eff_int = est.marginal_effect_interval(T, X) # d + eff_int = est.effect_interval(X, T0=T0, T1=T1) # d + self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(eff_int), (2,) + effect_shape) @@ -162,7 +171,7 @@ def eff_shape(n, d_x): self.assertEqual(len(est.cate_feature_names()), expect_feat_len) # test can run shap values - shap_values = est.shap_values(X[:10]) + _ = est.shap_values(X[:10]) def test_accuracy(self): np.random.seed(123) diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 7539c18f9..2f5048cc2 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -16,7 +16,7 @@ from econml.tests.dgp import DynamicPanelDGP -@pytest.mark.dml +@pytest.mark.cate_api_dml class TestDynamicDML(unittest.TestCase): def test_cate_api(self): From caba5ce521e76a132e6e0eda5b4a0543edf1a4bd Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 15:43:36 -0800 Subject: [PATCH 11/35] sets up new test marks in azure pipelin --- azure-pipelines.yml | 56 ++++++++++++++++++++++++++++++++++++++- econml/tests/test_driv.py | 6 ++--- pyproject.toml | 4 ++- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index dcaa3a0a1..b612c4b0e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -207,7 +207,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial)" -n 2' + PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial or cate_api or cate_api_dml)" -n 2' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' @@ -297,6 +297,60 @@ jobs: testRunTitle: 'Python $(python.version), image $(imageName)' condition: succeededOrFailed() + - task: PublishCodeCoverageResults@1 + displayName: 'Publish Code Coverage Results' + inputs: + codeCoverageTool: Cobertura + summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' + +- template: azure-pipelines-steps.yml + parameters: + package: '-e .[tf,plt]' + job: + job: Tests_serial + dependsOn: 'EvalChanges' + condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') + displayName: 'Run tests (CATE/DML)' + steps: + - script: 'pip install pytest pytest-runner && python setup.py pytest' + displayName: 'Unit tests' + env: + PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto' + COVERAGE_PROCESS_START: 'setup.cfg' + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/test-results.xml' + inputs: + testResultsFiles: '**/test-results.xml' + testRunTitle: 'Python $(python.version), image $(imageName)' + condition: succeededOrFailed() + + - task: PublishCodeCoverageResults@1 + displayName: 'Publish Code Coverage Results' + inputs: + codeCoverageTool: Cobertura + summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' + +- template: azure-pipelines-steps.yml + parameters: + package: '-e .[tf,plt]' + job: + job: Tests_serial + dependsOn: 'EvalChanges' + condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') + displayName: 'Run tests (CATE)' + steps: + - script: 'pip install pytest pytest-runner && python setup.py pytest' + displayName: 'Unit tests' + env: + PYTEST_ADDOPTS: '-m "cate_api" -n auto' + COVERAGE_PROCESS_START: 'setup.cfg' + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/test-results.xml' + inputs: + testResultsFiles: '**/test-results.xml' + testRunTitle: 'Python $(python.version), image $(imageName)' + condition: succeededOrFailed() + - task: PublishCodeCoverageResults@1 displayName: 'Publish Code Coverage Results' inputs: diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py index a9e0d30be..d66f8f96d 100644 --- a/econml/tests/test_driv.py +++ b/econml/tests/test_driv.py @@ -154,9 +154,9 @@ def eff_shape(n, d_x): self.assertEqual(shape(eff), effect_shape) # test inference - const_marg_eff_int = est.const_marginal_effect_interval(X) # defer to infere - marg_eff_int = est.marginal_effect_interval(T, X) # d - eff_int = est.effect_interval(X, T0=T0, T1=T1) # d + const_marg_eff_int = est.const_marginal_effect_interval(X) + marg_eff_int = est.marginal_effect_interval(T, X) + eff_int = est.effect_interval(X, T0=T0, T1=T1) self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(eff_int), (2,) + effect_shape) diff --git a/pyproject.toml b/pyproject.toml index 82048ea08..8e1ed4e52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,5 +16,7 @@ markers = [ "automl", "dml", "causal", - "serial" + "serial", + "cate_api", + "cate_api_dml" ] \ No newline at end of file From a588897f645a33b1b8f80f278709bf4152fe7bae Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 15:51:51 -0800 Subject: [PATCH 12/35] fixes typo in pipeline yml --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b612c4b0e..76fe0d426 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -307,7 +307,7 @@ jobs: parameters: package: '-e .[tf,plt]' job: - job: Tests_serial + job: Tests_cate_dml dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') displayName: 'Run tests (CATE/DML)' @@ -334,7 +334,7 @@ jobs: parameters: package: '-e .[tf,plt]' job: - job: Tests_serial + job: Tests_cate dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') displayName: 'Run tests (CATE)' From 0da686032b6fcdb1068f805130e29e862854acd7 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 15:58:44 -0800 Subject: [PATCH 13/35] debug pipeline --- azure-pipelines.yml | 54 --------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 76fe0d426..0203d4426 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -302,57 +302,3 @@ jobs: inputs: codeCoverageTool: Cobertura summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' - -- template: azure-pipelines-steps.yml - parameters: - package: '-e .[tf,plt]' - job: - job: Tests_cate_dml - dependsOn: 'EvalChanges' - condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') - displayName: 'Run tests (CATE/DML)' - steps: - - script: 'pip install pytest pytest-runner && python setup.py pytest' - displayName: 'Unit tests' - env: - PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto' - COVERAGE_PROCESS_START: 'setup.cfg' - - task: PublishTestResults@2 - displayName: 'Publish Test Results **/test-results.xml' - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version), image $(imageName)' - condition: succeededOrFailed() - - - task: PublishCodeCoverageResults@1 - displayName: 'Publish Code Coverage Results' - inputs: - codeCoverageTool: Cobertura - summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' - -- template: azure-pipelines-steps.yml - parameters: - package: '-e .[tf,plt]' - job: - job: Tests_cate - dependsOn: 'EvalChanges' - condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') - displayName: 'Run tests (CATE)' - steps: - - script: 'pip install pytest pytest-runner && python setup.py pytest' - displayName: 'Unit tests' - env: - PYTEST_ADDOPTS: '-m "cate_api" -n auto' - COVERAGE_PROCESS_START: 'setup.cfg' - - task: PublishTestResults@2 - displayName: 'Publish Test Results **/test-results.xml' - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version), image $(imageName)' - condition: succeededOrFailed() - - - task: PublishCodeCoverageResults@1 - displayName: 'Publish Code Coverage Results' - inputs: - codeCoverageTool: Cobertura - summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' \ No newline at end of file From fd5b3e27b435c95aa17a8fc0b70bfcb1cff34598 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 17:28:52 -0800 Subject: [PATCH 14/35] reorganize imports --- econml/tests/test_statsmodels.py | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 75a544cdf..6a1276662 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -1,31 +1,31 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import unittest +import joblib + import numpy as np import pytest -from econml.dml import DML, LinearDML, NonParamDML -from econml.dr import LinearDRLearner -from econml.iv.dr import LinearDRIV -from econml.iv.dml import DMLIV -from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete -from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper) -from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression -from econml.iv.dr._dr import _DummyCATE -from statsmodels.regression.linear_model import WLS -from statsmodels.tools.tools import add_constant -from statsmodels.sandbox.regression.gmm import IV2SLS + +import scipy.special +from sklearn.base import clone from sklearn.dummy import DummyClassifier -from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier +from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV from sklearn.model_selection import KFold, StratifiedKFold -import scipy.special -import time -from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS -from econml.sklearn_extensions.linear_model import StatsModels2SLS -import unittest -import joblib from sklearn.preprocessing import PolynomialFeatures -from sklearn.base import clone +from statsmodels.regression.linear_model import WLS +from statsmodels.sandbox.regression.gmm import IV2SLS +from statsmodels.tools.tools import add_constant + +from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete +from econml.dml import DML, LinearDML, NonParamDML +from econml.dr import LinearDRLearner +from econml.iv.dml import DMLIV +from econml.iv.dr import LinearDRIV +from econml.iv.dr._dr import _DummyCATE +from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression +from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper) class StatsModelsOLS: From ae7086b1d8e8ce078815c658b292ce26f20a6067 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 17:36:03 -0800 Subject: [PATCH 15/35] adds back dependency --- econml/tests/test_statsmodels.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 6a1276662..817f6c62f 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -25,6 +25,8 @@ from econml.iv.dr import LinearDRIV from econml.iv.dr._dr import _DummyCATE from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression +from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS +from econml.sklearn_extensions.linear_model import StatsModels2SLS from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper) From 57afa35b8f0614385883fc9f639aa8da22b5f865 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 17:41:09 -0800 Subject: [PATCH 16/35] remove unused imports --- econml/tests/test_statsmodels.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 817f6c62f..0d7b04452 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -2,28 +2,24 @@ # Licensed under the MIT License. import unittest -import joblib import numpy as np import pytest import scipy.special from sklearn.base import clone -from sklearn.dummy import DummyClassifier -from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier -from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV -from sklearn.model_selection import KFold, StratifiedKFold +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.preprocessing import PolynomialFeatures from statsmodels.regression.linear_model import WLS from statsmodels.sandbox.regression.gmm import IV2SLS from statsmodels.tools.tools import add_constant from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete -from econml.dml import DML, LinearDML, NonParamDML +from econml.dml import LinearDML, NonParamDML from econml.dr import LinearDRLearner from econml.iv.dml import DMLIV from econml.iv.dr import LinearDRIV -from econml.iv.dr._dr import _DummyCATE from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS from econml.sklearn_extensions.linear_model import StatsModels2SLS From 64c0c7f29f51f618bdfc28daf88b35aceb62ae07 Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 17:50:57 -0800 Subject: [PATCH 17/35] reduce size of matrix by half --- econml/tests/test_statsmodels.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 0d7b04452..8dcba87c7 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -1097,10 +1097,8 @@ def split(self, X, T): def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ - from econml.dml import LinearDML - from econml.inference import StatsModelsInference np.random.seed(123) - n = 100000 + n = 50000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): From 3d7340d96be9599416de79fe3a66e5789540a89d Mon Sep 17 00:00:00 2001 From: xrowan Date: Wed, 23 Feb 2022 18:23:32 -0800 Subject: [PATCH 18/35] reverts n --- econml/tests/test_statsmodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 8dcba87c7..dad735327 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -1098,7 +1098,7 @@ def split(self, X, T): def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ np.random.seed(123) - n = 50000 + n = 100000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): From 194743ead58cc84e74a5a31e8f2beebc12ece403 Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 24 Feb 2022 14:18:19 -0800 Subject: [PATCH 19/35] moves test_statsmodels to serial testing --- econml/tests/test_statsmodels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index dad735327..d9de9ff1a 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -26,6 +26,7 @@ from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper) +@pytest.mark.serial class StatsModelsOLS: """ Helper class to wrap a StatsModels OLS model to conform to the sklearn API. From ac28cf084e4633fa64bfc2f3fb6aac8877ae3e5c Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 24 Feb 2022 17:50:29 -0800 Subject: [PATCH 20/35] combines other tests to single job --- azure-pipelines.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0203d4426..499a28e2e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -280,11 +280,21 @@ jobs: parameters: package: '-e .[tf,plt]' job: - job: Tests_serial + job: Tests_other dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') - displayName: 'Run tests (Serial)' + displayName: 'Run tests (Other)' steps: + - script: 'pip install pytest pytest-runner && python setup.py pytest' + displayName: 'Unit tests' + env: + PYTEST_ADDOPTS: '-m "cate_api" -n auto' + COVERAGE_PROCESS_START: 'setup.cfg' + - script: 'python setup.py pytest' + displayName: 'Unit tests' + env: + PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto' + COVERAGE_PROCESS_START: 'setup.cfg' - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: @@ -301,4 +311,4 @@ jobs: displayName: 'Publish Code Coverage Results' inputs: codeCoverageTool: Cobertura - summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' + summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' \ No newline at end of file From 2ac1aa6e00ce8ac694a1ab1112306e9cd491f79b Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 24 Feb 2022 18:46:01 -0800 Subject: [PATCH 21/35] removes nested loops from test_random_state --- econml/tests/test_causal_analysis.py | 34 ++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index 316042e93..08cf83d33 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -2,11 +2,14 @@ # Licensed under the MIT License. import unittest + +from contextlib import ExitStack +import itertools import numpy as np from numpy.core.fromnumeric import squeeze import pandas as pd -from contextlib import ExitStack import pytest + from econml.solutions.causal_analysis import CausalAnalysis from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants @@ -670,21 +673,24 @@ def test_random_state(self): inds = [0, 1, 2, 3] cats = [2, 3] hinds = [0, 3] - for n_model in ['linear', 'automl']: - for h_model in ['linear', 'forest']: - for classification in [True, False]: - ca = CausalAnalysis(inds, cats, hinds, classification=classification, - nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) - ca.fit(X, y) - glo = ca.global_causal_effect() - ca2 = CausalAnalysis(inds, cats, hinds, classification=classification, - nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) - ca2.fit(X, y) - glo2 = ca.global_causal_effect() + for n_model, h_model, classification in itertools.product( + ['linear', 'automl'], + ['linear', 'forest'], + [True, False]): + + ca = CausalAnalysis(inds, cats, hinds, classification=classification, + nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) + ca.fit(X, y) + glo = ca.global_causal_effect() + + ca2 = CausalAnalysis(inds, cats, hinds, classification=classification, + nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) + ca2.fit(X, y) + glo2 = ca.global_causal_effect() - np.testing.assert_equal(glo.point.values, glo2.point.values) - np.testing.assert_equal(glo.stderr.values, glo2.stderr.values) + np.testing.assert_equal(glo.point.values, glo2.point.values) + np.testing.assert_equal(glo.stderr.values, glo2.stderr.values) def test_can_set_categories(self): y = pd.Series(np.random.choice([0, 1], size=(500,))) From 78c039148c39d6b042cd6cd0677588bb3db8f7f6 Mon Sep 17 00:00:00 2001 From: xrowan Date: Thu, 24 Feb 2022 18:50:46 -0800 Subject: [PATCH 22/35] fixes indenting --- econml/tests/test_causal_analysis.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index 08cf83d33..5b5ac804d 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -674,10 +674,10 @@ def test_random_state(self): cats = [2, 3] hinds = [0, 3] - for n_model, h_model, classification in itertools.product( - ['linear', 'automl'], - ['linear', 'forest'], - [True, False]): + for n_model, h_model, classification in\ + itertools.product(['linear', 'automl'], + ['linear', 'forest'], + [True, False]): ca = CausalAnalysis(inds, cats, hinds, classification=classification, nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) @@ -685,7 +685,7 @@ def test_random_state(self): glo = ca.global_causal_effect() ca2 = CausalAnalysis(inds, cats, hinds, classification=classification, - nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) + nuisance_models=n_model, heterogeneity_model=h_model, random_state=123) ca2.fit(X, y) glo2 = ca.global_causal_effect() From b6b6f49aa3ed2eea0deb3d26a1f65a57464247b2 Mon Sep 17 00:00:00 2001 From: xrowan Date: Fri, 25 Feb 2022 12:35:29 -0800 Subject: [PATCH 23/35] tries running causal tests in parallel --- azure-pipelines.yml | 2 +- econml/tests/test_causal_analysis.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 499a28e2e..2e89e548b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -261,7 +261,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "causal" -n 1' + PYTEST_ADDOPTS: '-m "causal" -n 2' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index 5b5ac804d..92d2d222a 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -1,10 +1,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import gc import unittest +import resource from contextlib import ExitStack import itertools +from memory_profiler import profile import numpy as np from numpy.core.fromnumeric import squeeze import pandas as pd @@ -692,6 +695,9 @@ def test_random_state(self): np.testing.assert_equal(glo.point.values, glo2.point.values) np.testing.assert_equal(glo.stderr.values, glo2.stderr.values) + del ca, glo, ca2, glo2 + gc.collect() + def test_can_set_categories(self): y = pd.Series(np.random.choice([0, 1], size=(500,))) X = pd.DataFrame({'a': np.random.normal(size=500), @@ -784,6 +790,9 @@ def test_invalid_inds(self): self.assertEqual(ca.trained_feature_indices_, [0, 1, 2, 3]) # can't handle last two self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'), (5, 'cat_limit')]) + + del ca + gc.collect() # Add tests that guarantee that the reliance on DML feature order is not broken, such as # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W From b36a2cb11c05402af7b00b8fc1bbc1c1762b6beb Mon Sep 17 00:00:00 2001 From: xrowan Date: Fri, 25 Feb 2022 12:41:40 -0800 Subject: [PATCH 24/35] remove unnecessary deps --- econml/tests/test_causal_analysis.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index 92d2d222a..a3658b7d2 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -3,11 +3,9 @@ import gc import unittest -import resource from contextlib import ExitStack import itertools -from memory_profiler import profile import numpy as np from numpy.core.fromnumeric import squeeze import pandas as pd From 1ea012b45bf9cd6f954da2bbe893edec9cfa6006 Mon Sep 17 00:00:00 2001 From: xrowan Date: Fri, 25 Feb 2022 12:51:39 -0800 Subject: [PATCH 25/35] linting fix --- econml/tests/test_causal_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index a3658b7d2..a98af10cc 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -788,7 +788,7 @@ def test_invalid_inds(self): self.assertEqual(ca.trained_feature_indices_, [0, 1, 2, 3]) # can't handle last two self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'), (5, 'cat_limit')]) - + del ca gc.collect() From 7664f757e80d6f5c596b2f56c81cfa0c50c4823a Mon Sep 17 00:00:00 2001 From: xrowan Date: Fri, 25 Feb 2022 13:28:19 -0800 Subject: [PATCH 26/35] reverts to run causal tests serially --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2e89e548b..499a28e2e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -261,7 +261,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "causal" -n 2' + PYTEST_ADDOPTS: '-m "causal" -n 1' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' From b94f45481f786abad6f6eb7258de235f48f77151 Mon Sep 17 00:00:00 2001 From: xrowan Date: Fri, 25 Feb 2022 15:15:21 -0800 Subject: [PATCH 27/35] correctly tags stats models --- econml/tests/test_statsmodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index d9de9ff1a..9d47ba69b 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -26,7 +26,6 @@ from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper) -@pytest.mark.serial class StatsModelsOLS: """ Helper class to wrap a StatsModels OLS model to conform to the sklearn API. @@ -266,6 +265,7 @@ def _compare_dr_classes(est, lr, X_test, alpha=.05, tol=1e-10): "{}, {}".format(est.effect_interval(X_test, alpha=alpha), lr.effect_interval(X_test, alpha=alpha)) +@pytest.mark.serial class TestStatsModels(unittest.TestCase): def test_comp_with_lr(self): From e943f99502819e506dc70aab568ac2f183c24010 Mon Sep 17 00:00:00 2001 From: xrowan Date: Mon, 7 Mar 2022 19:41:05 -0800 Subject: [PATCH 28/35] re-enables pickling test --- econml/tests/test_driv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py index d66f8f96d..bddff8904 100644 --- a/econml/tests/test_driv.py +++ b/econml/tests/test_driv.py @@ -127,12 +127,12 @@ def eff_shape(n, d_x): # TODO: serializing/deserializing for every combination -- is this necessary? # ensure we can serialize unfit estimator - # pickle.dumps(est) + pickle.dumps(est) est.fit(y, T, Z=Z, X=X, W=W) # ensure we can serialize fit estimator - # pickle.dumps(est) + pickle.dumps(est) # expected effect size exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) From fea98c58f036d7907aaf241e1b79420acd8bcc84 Mon Sep 17 00:00:00 2001 From: xrowan Date: Mon, 7 Mar 2022 19:43:40 -0800 Subject: [PATCH 29/35] uncomments options --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e1ed4e52..80f8ed8dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires = [ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] -# addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml" +addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml" markers = [ "slow", "notebook", From 691ea6e8c2d47ffb7e49510f0433e90b74197fa8 Mon Sep 17 00:00:00 2001 From: xrowan Date: Mon, 7 Mar 2022 19:50:43 -0800 Subject: [PATCH 30/35] reorganizes unit test workflow --- azure-pipelines.yml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 499a28e2e..7ed583149 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -285,18 +285,15 @@ jobs: condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') displayName: 'Run tests (Other)' steps: - - script: 'pip install pytest pytest-runner && python setup.py pytest' - displayName: 'Unit tests' - env: - PYTEST_ADDOPTS: '-m "cate_api" -n auto' - COVERAGE_PROCESS_START: 'setup.cfg' + - script: 'pip install pytest pytest-runner' + displayName: 'Install pytest' - script: 'python setup.py pytest' - displayName: 'Unit tests' + displayName: 'CATE Unit tests' env: - PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto' + PYTEST_ADDOPTS: '-m "cate_api_dml or cate_api" -n auto' COVERAGE_PROCESS_START: 'setup.cfg' - script: 'pip install pytest pytest-runner && python setup.py pytest' - displayName: 'Unit tests' + displayName: 'Serial Unit tests' env: PYTEST_ADDOPTS: '-m "serial" -n 1' COVERAGE_PROCESS_START: 'setup.cfg' From 01c8001837c9d22972fab0c9a8e529ea57459d12 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 30 Mar 2022 12:14:46 -0400 Subject: [PATCH 31/35] Consolidate test marks --- azure-pipelines.yml | 15 +++++---------- econml/tests/test_causal_analysis.py | 10 ++-------- econml/tests/test_dynamic_dml.py | 2 +- econml/tests/test_tree.py | 3 --- pyproject.toml | 4 +--- 5 files changed, 9 insertions(+), 25 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7ed583149..7b7a27fea 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -207,7 +207,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial or cate_api or cate_api_dml)" -n 2' + PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or serial or cate_api)" -n 2' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' @@ -253,7 +253,7 @@ jobs: parameters: package: '-e .[tf,plt]' job: - job: Tests_causal + job: Tests_serial dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') displayName: 'Run tests (Causal)' @@ -261,7 +261,7 @@ jobs: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "causal" -n 1' + PYTEST_ADDOPTS: '-m "serial" -n 1' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' @@ -280,7 +280,7 @@ jobs: parameters: package: '-e .[tf,plt]' job: - job: Tests_other + job: Tests_CATE_API dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') displayName: 'Run tests (Other)' @@ -290,12 +290,7 @@ jobs: - script: 'python setup.py pytest' displayName: 'CATE Unit tests' env: - PYTEST_ADDOPTS: '-m "cate_api_dml or cate_api" -n auto' - COVERAGE_PROCESS_START: 'setup.cfg' - - script: 'pip install pytest pytest-runner && python setup.py pytest' - displayName: 'Serial Unit tests' - env: - PYTEST_ADDOPTS: '-m "serial" -n 1' + PYTEST_ADDOPTS: '-m "cate_api" -n auto' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index a98af10cc..f99c648d0 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import gc import unittest from contextlib import ExitStack @@ -19,7 +18,7 @@ def assert_less_close(arr1, arr2): assert np.all(np.logical_or(arr1 <= arr2, np.isclose(arr1, arr2))) -@pytest.mark.causal +@pytest.mark.serial class TestCausalAnalysis(unittest.TestCase): def test_basic_array(self): @@ -693,9 +692,6 @@ def test_random_state(self): np.testing.assert_equal(glo.point.values, glo2.point.values) np.testing.assert_equal(glo.stderr.values, glo2.stderr.values) - del ca, glo, ca2, glo2 - gc.collect() - def test_can_set_categories(self): y = pd.Series(np.random.choice([0, 1], size=(500,))) X = pd.DataFrame({'a': np.random.normal(size=500), @@ -789,14 +785,12 @@ def test_invalid_inds(self): self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'), (5, 'cat_limit')]) - del ca - gc.collect() - # Add tests that guarantee that the reliance on DML feature order is not broken, such as # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W # Pass an example where W is irrelevant and X is confounder # As long as DML doesnt change the order of the inputs, then things should be good. Otherwise X would be # zeroed out and the test will fail + def test_scaling_transforms(self): # shouldn't matter if X is scaled much larger or much smaller than W, we should still get good estimates n = 2000 diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 2f5048cc2..d007a2706 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -16,7 +16,7 @@ from econml.tests.dgp import DynamicPanelDGP -@pytest.mark.cate_api_dml +@pytest.mark.cate_api class TestDynamicDML(unittest.TestCase): def test_cate_api(self): diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py index 7267b93fc..b5d898f56 100644 --- a/econml/tests/test_tree.py +++ b/econml/tests/test_tree.py @@ -258,9 +258,6 @@ def test_honest_values(self): np.testing.assert_array_almost_equal(tree.value.flatten(), .4 * np.ones(len(tree.value))) def test_noisy_instance(self): - """ - The purpose of this test - """ # initialize parameters n_samples = 5000 diff --git a/pyproject.toml b/pyproject.toml index 80f8ed8dd..bf2e2991e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,8 +15,6 @@ markers = [ "notebook", "automl", "dml", - "causal", "serial", - "cate_api", - "cate_api_dml" + "cate_api" ] \ No newline at end of file From 6ad331555051c4e29c345e0d458192a439e707a3 Mon Sep 17 00:00:00 2001 From: "Keith Battocchi (HE/HIM)" Date: Fri, 1 Apr 2022 12:26:23 -0400 Subject: [PATCH 32/35] Update numpy test dependency --- azure-pipelines.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7b7a27fea..de74e478b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -96,7 +96,7 @@ jobs: displayName: 'Notebooks (Customer Solutions)' steps: # Work around https://github.com/pypa/pip/issues/9542 - - script: 'pip install -U numpy~=1.21.0' + - script: 'pip install -U numpy~=1.22.0' displayName: 'Upgrade numpy' - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest' @@ -124,7 +124,7 @@ jobs: displayName: 'Notebooks (except Customer Solutions)' steps: # Work around https://github.com/pypa/pip/issues/9542 - - script: 'pip install -U numpy~=1.21.0' + - script: 'pip install -U numpy~=1.22.0' displayName: 'Upgrade numpy' # shap 0.39 and sklearn 1.0 interact badly in these notebooks @@ -256,7 +256,7 @@ jobs: job: Tests_serial dependsOn: 'EvalChanges' condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True') - displayName: 'Run tests (Causal)' + displayName: 'Run tests (Serial)' steps: - script: 'pip install pytest pytest-runner && python setup.py pytest' displayName: 'Unit tests' From 564af9e4a41454510b7dc79dab710a148bd8c600 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Mon, 4 Apr 2022 18:09:14 -0400 Subject: [PATCH 33/35] Revert numpy version --- azure-pipelines.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index de74e478b..ff9b84a82 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -96,10 +96,10 @@ jobs: displayName: 'Notebooks (Customer Solutions)' steps: # Work around https://github.com/pypa/pip/issues/9542 - - script: 'pip install -U numpy~=1.22.0' + - script: 'pip install -U numpy~=1.21.0' displayName: 'Upgrade numpy' - - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest' + - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest' displayName: 'Unit tests' env: PYTEST_ADDOPTS: '-m "notebook"' @@ -124,7 +124,7 @@ jobs: displayName: 'Notebooks (except Customer Solutions)' steps: # Work around https://github.com/pypa/pip/issues/9542 - - script: 'pip install -U numpy~=1.22.0' + - script: 'pip install -U numpy~=1.21.0' displayName: 'Upgrade numpy' # shap 0.39 and sklearn 1.0 interact badly in these notebooks From 96d729200f412282fd2a57a6d5e0893cbbc106e6 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Tue, 5 Apr 2022 12:58:11 -0400 Subject: [PATCH 34/35] Remove workaround for downlevel shap --- azure-pipelines.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ff9b84a82..7f6282ae3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -98,7 +98,7 @@ jobs: # Work around https://github.com/pypa/pip/issues/9542 - script: 'pip install -U numpy~=1.21.0' displayName: 'Upgrade numpy' - + - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest' displayName: 'Unit tests' env: @@ -126,12 +126,6 @@ jobs: # Work around https://github.com/pypa/pip/issues/9542 - script: 'pip install -U numpy~=1.21.0' displayName: 'Upgrade numpy' - - # shap 0.39 and sklearn 1.0 interact badly in these notebooks - # shap 0.40 has a bug in waterfall (https://github.com/slundberg/shap/issues/2283) that breaks our main tests - # but fixes the interaction here... - - script: 'pip install -U shap~=0.40.0' - displayName: 'Upgrade shap' - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest' displayName: 'Unit tests' From 4e8d0dff4fa322cf5907097944a0eb7976639a61 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Tue, 5 Apr 2022 15:07:17 -0400 Subject: [PATCH 35/35] Add GC collections --- econml/tests/test_causal_analysis.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py index f99c648d0..4293f3233 100644 --- a/econml/tests/test_causal_analysis.py +++ b/econml/tests/test_causal_analysis.py @@ -9,6 +9,7 @@ from numpy.core.fromnumeric import squeeze import pandas as pd import pytest +import gc from econml.solutions.causal_analysis import CausalAnalysis from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants @@ -692,6 +693,9 @@ def test_random_state(self): np.testing.assert_equal(glo.point.values, glo2.point.values) np.testing.assert_equal(glo.stderr.values, glo2.stderr.values) + del ca, glo, ca2, glo2 + gc.collect() + def test_can_set_categories(self): y = pd.Series(np.random.choice([0, 1], size=(500,))) X = pd.DataFrame({'a': np.random.normal(size=500), @@ -785,6 +789,9 @@ def test_invalid_inds(self): self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'), (5, 'cat_limit')]) + del ca + gc.collect() + # Add tests that guarantee that the reliance on DML feature order is not broken, such as # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W # Pass an example where W is irrelevant and X is confounder