From fa4e28ea199c3f9842a2286fef902fab392308f2 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 17 Feb 2022 14:17:33 -0800
Subject: [PATCH 01/35] removes nested for loops; reduces n to 500

---
 econml/tests/test_driv.py | 271 +++++++++++++++++++-------------------
 1 file changed, 139 insertions(+), 132 deletions(-)

diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index ed3b88eb0..e4f6b76de 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -1,17 +1,18 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import unittest
+from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,)
+from econml.iv.dr._dr import _DummyCATE
+from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
+from econml.utilities import shape
+
+import itertools
+import numpy as np
 import pytest
 import pickle
-import numpy as np
 from scipy import special
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from econml.iv.dr._dr import _DummyCATE
-from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
 from sklearn.preprocessing import PolynomialFeatures
-from econml.utilities import shape
-from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,)
+import unittest
 
 
 class TestDRIV(unittest.TestCase):
@@ -25,137 +26,143 @@ def marg_eff_shape(n, binary_T):
         def eff_shape(n, d_x):
             return (n if d_x else 1,)
 
-        n = 1000
+        n = 500
         y = np.random.normal(size=(n,))
 
-        for d_w in [None, 10]:
+        for d_w, d_x, binary_T, binary_Z, projection, featurizer in itertools.product(
+                                            [None, 10],     # d_w
+                                            [None, 3],      # d_x
+                                            [True, False],  # binary_T
+                                            [True, False],  # binary_Z
+                                            [True, False],  # projection
+                                            [None,          # featureizer
+                                            PolynomialFeatures(degree=2, include_bias=False),] 
+                                                        ):
+
             if d_w is None:
                 W = None
             else:
                 W = np.random.normal(size=(n, d_w))
-            for d_x in [None, 3]:
-                if d_x is None:
-                    X = None
-                else:
-                    X = np.random.normal(size=(n, d_x))
-                for binary_T in [True, False]:
-                    if binary_T:
-                        T = np.random.choice(["a", "b"], size=(n,))
-                    else:
-                        T = np.random.normal(size=(n,))
-                    for binary_Z in [True, False]:
-                        if binary_Z:
-                            Z = np.random.choice(["c", "d"], size=(n,))
-                        else:
-                            Z = np.random.normal(size=(n,))
-                        for projection in [True, False]:
-                            for featurizer in [
-                                None,
-                                PolynomialFeatures(degree=2, include_bias=False),
-                            ]:
-                                est_list = [
-                                    DRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        model_final=StatsModelsLinearRegression(
-                                            fit_intercept=False
-                                        ),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    LinearDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    SparseLinearDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    ForestDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                ]
-
-                                if X is None:
-                                    est_list = est_list[:-1]
-
-                                if binary_T and binary_Z:
-                                    est_list += [
-                                        IntentToTreatDRIV(
-                                            flexible_model_effect=StatsModelsLinearRegression(
-                                                fit_intercept=False
-                                            ),
-                                            fit_cate_intercept=True,
-                                            featurizer=featurizer,
-                                        ),
-                                        LinearIntentToTreatDRIV(
-                                            flexible_model_effect=StatsModelsLinearRegression(
-                                                fit_intercept=False
-                                            ),
-                                            featurizer=featurizer,
-                                        ),
-                                    ]
-
-                                for est in est_list:
-                                    with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z,
-                                                      projection=projection, featurizer=featurizer,
-                                                      est=est):
-
-                                        # ensure we can serialize unfit estimator
-                                        pickle.dumps(est)
-
-                                        est.fit(y, T, Z=Z, X=X, W=W)
-
-                                        # ensure we can serialize fit estimator
-                                        pickle.dumps(est)
-
-                                        # expected effect size
-                                        const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
-                                        marginal_effect_shape = marg_eff_shape(n, binary_T)
-                                        effect_shape = eff_shape(n, d_x)
-                                        # test effect
-                                        const_marg_eff = est.const_marginal_effect(X)
-                                        self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
-                                        marg_eff = est.marginal_effect(T, X)
-                                        self.assertEqual(shape(marg_eff), marginal_effect_shape)
-                                        T0 = "a" if binary_T else 0
-                                        T1 = "b" if binary_T else 1
-                                        eff = est.effect(X, T0=T0, T1=T1)
-                                        self.assertEqual(shape(eff), effect_shape)
-
-                                        # test inference
-                                        const_marg_eff_int = est.const_marginal_effect_interval(X)
-                                        marg_eff_int = est.marginal_effect_interval(T, X)
-                                        eff_int = est.effect_interval(X, T0=T0, T1=T1)
-                                        self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape)
-                                        self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
-                                        self.assertEqual(shape(eff_int), (2,) + effect_shape)
-
-                                        # test can run score
-                                        est.score(y, T, Z=Z, X=X, W=W)
-
-                                        if X is not None:
-                                            # test cate_feature_names
-                                            expect_feat_len = featurizer.fit(
-                                                X).n_output_features_ if featurizer else d_x
-                                            self.assertEqual(len(est.cate_feature_names()), expect_feat_len)
-
-                                            # test can run shap values
-                                            shap_values = est.shap_values(X[:10])
+
+            if d_x is None:
+                X = None
+            else:
+                X = np.random.normal(size=(n, d_x))
+                
+            if binary_T:
+                T = np.random.choice(["a", "b"], size=(n,))
+            else:
+                T = np.random.normal(size=(n,))
+
+            if binary_Z:
+                Z = np.random.choice(["c", "d"], size=(n,))
+            else:
+                Z = np.random.normal(size=(n,))
+ 
+            est_list = [
+                DRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    model_final=StatsModelsLinearRegression(
+                        fit_intercept=False
+                    ),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                LinearDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                SparseLinearDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                ForestDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+            ]
+
+            if X is None:
+                est_list = est_list[:-1]
+
+            if binary_T and binary_Z:
+                est_list += [
+                    IntentToTreatDRIV(
+                        flexible_model_effect=StatsModelsLinearRegression(
+                            fit_intercept=False
+                        ),
+                        fit_cate_intercept=True,
+                        featurizer=featurizer,
+                    ),
+                    LinearIntentToTreatDRIV(
+                        flexible_model_effect=StatsModelsLinearRegression(
+                            fit_intercept=False
+                        ),
+                        featurizer=featurizer,
+                    ),
+                ]
+
+            for est in est_list:
+                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z,
+                                    projection=projection, featurizer=featurizer,
+                                    est=est):
+
+                    # ensure we can serialize unfit estimator
+                    pickle.dumps(est)
+
+                    est.fit(y, T, Z=Z, X=X, W=W)
+
+                    # ensure we can serialize fit estimator
+                    pickle.dumps(est)
+
+                    # expected effect size
+                    const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
+                    marginal_effect_shape = marg_eff_shape(n, binary_T)
+                    effect_shape = eff_shape(n, d_x)
+                    
+                    # test effect
+                    const_marg_eff = est.const_marginal_effect(X)
+                    self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
+                    marg_eff = est.marginal_effect(T, X)
+                    self.assertEqual(shape(marg_eff), marginal_effect_shape)
+                    T0 = "a" if binary_T else 0
+                    T1 = "b" if binary_T else 1
+                    eff = est.effect(X, T0=T0, T1=T1)
+                    self.assertEqual(shape(eff), effect_shape)
+
+                    # test inference
+                    const_marg_eff_int = est.const_marginal_effect_interval(X)
+                    marg_eff_int = est.marginal_effect_interval(T, X)
+                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
+                    self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape)
+                    self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
+                    self.assertEqual(shape(eff_int), (2,) + effect_shape)
+
+                    # test can run score
+                    est.score(y, T, Z=Z, X=X, W=W)
+
+                    if X is not None:
+                        # test cate_feature_names
+                        expect_feat_len = featurizer.fit(
+                            X).n_output_features_ if featurizer else d_x
+                        self.assertEqual(len(est.cate_feature_names()), expect_feat_len)
+
+                        # test can run shap values
+                        shap_values = est.shap_values(X[:10])
 
     def test_accuracy(self):
         np.random.seed(123)

From a4eb09f37557c389481faae279b29b9df0a1089b Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 17 Feb 2022 15:03:42 -0800
Subject: [PATCH 02/35] linting fixes

---
 econml/tests/test_driv.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index e4f6b76de..a8505d9ef 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -29,15 +29,15 @@ def eff_shape(n, d_x):
         n = 500
         y = np.random.normal(size=(n,))
 
-        for d_w, d_x, binary_T, binary_Z, projection, featurizer in itertools.product(
-                                            [None, 10],     # d_w
-                                            [None, 3],      # d_x
-                                            [True, False],  # binary_T
-                                            [True, False],  # binary_Z
-                                            [True, False],  # projection
-                                            [None,          # featureizer
-                                            PolynomialFeatures(degree=2, include_bias=False),] 
-                                                        ):
+        # parameter combinations to test
+        for d_w, d_x, binary_T, binary_Z, projection, featurizer\
+            in itertools.product(
+                [None, 10],     # d_w
+                [None, 3],      # d_x
+                [True, False],  # binary_T
+                [True, False],  # binary_Z
+                [True, False],  # projection
+                [None, PolynomialFeatures(degree=2, include_bias=False), ]):    # featurizer
 
             if d_w is None:
                 W = None
@@ -48,7 +48,7 @@ def eff_shape(n, d_x):
                 X = None
             else:
                 X = np.random.normal(size=(n, d_x))
-                
+
             if binary_T:
                 T = np.random.choice(["a", "b"], size=(n,))
             else:
@@ -58,7 +58,7 @@ def eff_shape(n, d_x):
                 Z = np.random.choice(["c", "d"], size=(n,))
             else:
                 Z = np.random.normal(size=(n,))
- 
+
             est_list = [
                 DRIV(
                     flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
@@ -117,9 +117,9 @@ def eff_shape(n, d_x):
                 ]
 
             for est in est_list:
-                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z,
-                                    projection=projection, featurizer=featurizer,
-                                    est=est):
+                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T,
+                                  binary_Z=binary_Z, projection=projection, featurizer=featurizer,
+                                  est=est):
 
                     # ensure we can serialize unfit estimator
                     pickle.dumps(est)
@@ -133,7 +133,7 @@ def eff_shape(n, d_x):
                     const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
                     marginal_effect_shape = marg_eff_shape(n, binary_T)
                     effect_shape = eff_shape(n, d_x)
-                    
+
                     # test effect
                     const_marg_eff = est.const_marginal_effect(X)
                     self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

From dc47b6affb7a42454836b1e8022a628eabc878d6 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 17 Feb 2022 15:10:16 -0800
Subject: [PATCH 03/35] reorganize imports

---
 econml/tests/test_dmliv.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py
index db8b328d8..a1dbd68d2 100644
--- a/econml/tests/test_dmliv.py
+++ b/econml/tests/test_dmliv.py
@@ -1,18 +1,21 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import unittest
-import pytest
 import pickle
+import unittest
+
 import numpy as np
+import pytest
 from scipy import special
-from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.preprocessing import PolynomialFeatures
+
+from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
 from econml.iv.dr._dr import _DummyCATE
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
-from sklearn.preprocessing import PolynomialFeatures
 from econml.utilities import shape
-from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
+
 
 
 class TestDMLIV(unittest.TestCase):

From 8e4682fc49e0f0577acbfacf397de1befce8be70 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 10:49:27 -0800
Subject: [PATCH 04/35] removes unused imports

---
 econml/tests/test_tree.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py
index 7de214f6c..de18c9fd8 100644
--- a/econml/tests/test_tree.py
+++ b/econml/tests/test_tree.py
@@ -2,12 +2,9 @@
 # Licensed under the MIT License.
 
 import unittest
-import logging
-import time
-import random
+
 import numpy as np
-import sparse as sp
-import pytest
+
 from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE
 
 

From 2cb7f40a4a3cc65a5977f063270c27e729f436e1 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 13:28:22 -0800
Subject: [PATCH 05/35] runs tree tests serially

---
 azure-pipelines.yml       | 29 ++++++++++++++++++++++++++++-
 econml/tests/test_tree.py | 18 +++++++++++++++++-
 pyproject.toml            |  5 +++--
 3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4c80ed729..fbfe371e5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -207,7 +207,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2'
+          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial)" -n 2'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
@@ -270,6 +270,33 @@ jobs:
           testRunTitle: 'Python $(python.version), image $(imageName)'
         condition: succeededOrFailed()
 
+      - task: PublishCodeCoverageResults@1
+        displayName: 'Publish Code Coverage Results'
+        inputs:
+          codeCoverageTool: Cobertura
+          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
+
+- template: azure-pipelines-steps.yml
+  parameters:
+    package: '-e .[tf,plt]'
+    job:
+      job: Tests_serial
+      dependsOn: 'EvalChanges'
+      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
+      displayName: 'Run tests (Causal)'
+      steps:
+      - script: 'pip install pytest pytest-runner && python setup.py pytest'
+        displayName: 'Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "serial" -n 1'
+          COVERAGE_PROCESS_START: 'setup.cfg'
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results **/test-results.xml'
+        inputs:
+          testResultsFiles: '**/test-results.xml'
+          testRunTitle: 'Python $(python.version), image $(imageName)'
+        condition: succeededOrFailed()
+
       - task: PublishCodeCoverageResults@1
         displayName: 'Publish Code Coverage Results'
         inputs:
diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py
index de18c9fd8..b6842c4a1 100644
--- a/econml/tests/test_tree.py
+++ b/econml/tests/test_tree.py
@@ -4,10 +4,11 @@
 import unittest
 
 import numpy as np
+import pytest
 
 from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE
 
-
+@pytest.mark.serial
 class TestTree(unittest.TestCase):
 
     def _get_base_config(self):
@@ -256,10 +257,17 @@ def test_honest_values(self):
         np.testing.assert_array_almost_equal(tree.value.flatten(), .4 * np.ones(len(tree.value)))
 
     def test_noisy_instance(self):
+        """
+        The purpose of this test
+        """
+
+        # initialize parameters
         n_samples = 5000
         X = np.random.normal(0, 1, size=(n_samples, 1))
         y_base = 1.0 * X[:, [0]] * (X[:, [0]] > 0)
         y = y_base + np.random.normal(0, .1, size=(n_samples, 1))
+
+        # initialize config wtih base config and overwite some values
         config = self._get_base_config()
         config['n_features'] = 1
         config['max_features'] = 1
@@ -271,11 +279,16 @@ def test_noisy_instance(self):
         config['max_node_samples'] = X.shape[0]
         config['samples_train'] = np.arange(X.shape[0], dtype=np.intp)
         config['samples_val'] = np.arange(X.shape[0], dtype=np.intp)
+
+        # predict tree using config parameters and assert
+        # shape of trained tree is the same as y_test
         tree = self._train_tree(config, X, y)
         X_test = np.zeros((100, 1))
         X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100)
         y_test = 1.0 * X_test[:, [0]] * (X_test[:, [0]] > 0)
         np.testing.assert_array_almost_equal(tree.predict(X_test), y_test, decimal=1)
+
+        # initialize config wtih base honest config and overwite some values
         config = self._get_base_honest_config()
         config['n_features'] = 1
         config['max_features'] = 1
@@ -287,6 +300,9 @@ def test_noisy_instance(self):
         config['max_node_samples'] = X.shape[0] // 2
         config['samples_train'] = np.arange(X.shape[0] // 2, dtype=np.intp)
         config['samples_val'] = np.arange(X.shape[0] // 2, X.shape[0], dtype=np.intp)
+
+        # predict tree using config parameters and assert
+        # shape of trained tree is the same as y_test 
         tree = self._train_tree(config, X, y)
         X_test = np.zeros((100, 1))
         X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100)
diff --git a/pyproject.toml b/pyproject.toml
index 9f0652dbb..82048ea08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,11 +9,12 @@ requires = [
 build-backend = "setuptools.build_meta"
 
 [tool.pytest.ini_options]
-addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml"
+# addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml"
 markers = [    
     "slow",
     "notebook",
     "automl",
     "dml",
-    "causal"
+    "causal",
+    "serial"
 ]
\ No newline at end of file

From 0cccf374ac4f0f5c25d8da072abfd5bf466a8568 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 13:42:08 -0800
Subject: [PATCH 06/35] fixes typo

---
 azure-pipelines.yml       | 2 +-
 econml/tests/test_tree.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index fbfe371e5..dcaa3a0a1 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -283,7 +283,7 @@ jobs:
       job: Tests_serial
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (Causal)'
+      displayName: 'Run tests (Serial)'
       steps:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py
index b6842c4a1..7267b93fc 100644
--- a/econml/tests/test_tree.py
+++ b/econml/tests/test_tree.py
@@ -8,6 +8,7 @@
 
 from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE
 
+
 @pytest.mark.serial
 class TestTree(unittest.TestCase):
 
@@ -302,7 +303,7 @@ def test_noisy_instance(self):
         config['samples_val'] = np.arange(X.shape[0] // 2, X.shape[0], dtype=np.intp)
 
         # predict tree using config parameters and assert
-        # shape of trained tree is the same as y_test 
+        # shape of trained tree is the same as y_test
         tree = self._train_tree(config, X, y)
         X_test = np.zeros((100, 1))
         X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100)

From 684e1c70f5d4988a309979ec6ccd3c9929819bf9 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 13:50:57 -0800
Subject: [PATCH 07/35] linting fix

---
 econml/tests/test_dmliv.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py
index a1dbd68d2..9e73b491f 100644
--- a/econml/tests/test_dmliv.py
+++ b/econml/tests/test_dmliv.py
@@ -17,7 +17,6 @@
 from econml.utilities import shape
 
 
-
 class TestDMLIV(unittest.TestCase):
     def test_cate_api(self):
         def const_marg_eff_shape(n, d_x, d_y, binary_T):

From 0b951fd1cc4c7eb3963b75e59a26a46486c3e44a Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 14:26:03 -0800
Subject: [PATCH 08/35] reorganize imports

---
 econml/tests/test_drlearner.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
index 3674aa7af..9315ad618 100644
--- a/econml/tests/test_drlearner.py
+++ b/econml/tests/test_drlearner.py
@@ -1,26 +1,27 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import numpy as np
+from contextlib import ExitStack
+import pickle
 import unittest
+
+import numpy as np
+from numpy.random import normal, multivariate_normal, binomial
 import pytest
-import pickle
+
+import scipy.special
 from sklearn.base import TransformerMixin
-from numpy.random import normal, multivariate_normal, binomial
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.exceptions import DataConversionWarning
 from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
 from sklearn.model_selection import KFold, GroupKFold
-from sklearn.preprocessing import PolynomialFeatures
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
+
 from econml.dr import DRLearner, LinearDRLearner, SparseLinearDRLearner, ForestDRLearner
-from econml.utilities import shape, hstack, vstack, reshape, cross_product
 from econml.inference import BootstrapInference, StatsModelsInferenceDiscrete
-from contextlib import ExitStack
-from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.linear_model import LinearRegression, LogisticRegression
+from econml.utilities import shape, hstack, vstack, reshape, cross_product
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
-import scipy.special
 import econml.tests.utilities  # bugfix for assertWarns
 
 

From d1068c0c8a28a8849305a4644064405700c0e07a Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 14:34:21 -0800
Subject: [PATCH 09/35] run test serially

---
 econml/tests/test_drlearner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
index 9315ad618..1f81bd0ab 100644
--- a/econml/tests/test_drlearner.py
+++ b/econml/tests/test_drlearner.py
@@ -25,6 +25,7 @@
 import econml.tests.utilities  # bugfix for assertWarns
 
 
+@pytest.mark.serial
 class TestDRLearner(unittest.TestCase):
 
     @classmethod

From 33c580f0d369b17a9c223b0f3a645002f961b81e Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 15:38:51 -0800
Subject: [PATCH 10/35] adds tags for cate_api tests

---
 econml/tests/test_dmliv.py       |  1 +
 econml/tests/test_driv.py        | 29 +++++++++++++++++++----------
 econml/tests/test_dynamic_dml.py |  2 +-
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py
index 9e73b491f..54175d0c9 100644
--- a/econml/tests/test_dmliv.py
+++ b/econml/tests/test_dmliv.py
@@ -17,6 +17,7 @@
 from econml.utilities import shape
 
 
+@pytest.mark.cate_api
 class TestDMLIV(unittest.TestCase):
     def test_cate_api(self):
         def const_marg_eff_shape(n, d_x, d_y, binary_T):
diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index a8505d9ef..a9e0d30be 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -15,15 +15,19 @@
 import unittest
 
 
+@pytest.mark.cate_api
 class TestDRIV(unittest.TestCase):
     def test_cate_api(self):
         def const_marg_eff_shape(n, d_x, binary_T):
+            """Constant marginal effect shape."""
             return (n if d_x else 1,) + ((1,) if binary_T else ())
 
         def marg_eff_shape(n, binary_T):
+            """Marginal effect shape."""
             return (n,) + ((1,) if binary_T else ())
 
         def eff_shape(n, d_x):
+            "Effect shape."
             return (n if d_x else 1,)
 
         n = 500
@@ -121,34 +125,39 @@ def eff_shape(n, d_x):
                                   binary_Z=binary_Z, projection=projection, featurizer=featurizer,
                                   est=est):
 
+                    # TODO: serializing/deserializing for every combination -- is this necessary?
                     # ensure we can serialize unfit estimator
-                    pickle.dumps(est)
+                    # pickle.dumps(est)
 
                     est.fit(y, T, Z=Z, X=X, W=W)
 
                     # ensure we can serialize fit estimator
-                    pickle.dumps(est)
+                    # pickle.dumps(est)
 
                     # expected effect size
-                    const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
+                    exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
                     marginal_effect_shape = marg_eff_shape(n, binary_T)
                     effect_shape = eff_shape(n, d_x)
 
-                    # test effect
+                    # assert calculated constant marginal effect shape is expected
+                    # const_marginal effect is defined in LinearCateEstimator class
                     const_marg_eff = est.const_marginal_effect(X)
-                    self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
+                    self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape)
+
+                    # assert calculated marginal effect shape is expected
                     marg_eff = est.marginal_effect(T, X)
                     self.assertEqual(shape(marg_eff), marginal_effect_shape)
+
                     T0 = "a" if binary_T else 0
                     T1 = "b" if binary_T else 1
                     eff = est.effect(X, T0=T0, T1=T1)
                     self.assertEqual(shape(eff), effect_shape)
 
                     # test inference
-                    const_marg_eff_int = est.const_marginal_effect_interval(X)
-                    marg_eff_int = est.marginal_effect_interval(T, X)
-                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
-                    self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape)
+                    const_marg_eff_int = est.const_marginal_effect_interval(X) # defer to infere
+                    marg_eff_int = est.marginal_effect_interval(T, X) # d
+                    eff_int = est.effect_interval(X, T0=T0, T1=T1)   # d
+                    self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape)
                     self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
                     self.assertEqual(shape(eff_int), (2,) + effect_shape)
 
@@ -162,7 +171,7 @@ def eff_shape(n, d_x):
                         self.assertEqual(len(est.cate_feature_names()), expect_feat_len)
 
                         # test can run shap values
-                        shap_values = est.shap_values(X[:10])
+                        _ = est.shap_values(X[:10])
 
     def test_accuracy(self):
         np.random.seed(123)
diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py
index 7539c18f9..2f5048cc2 100644
--- a/econml/tests/test_dynamic_dml.py
+++ b/econml/tests/test_dynamic_dml.py
@@ -16,7 +16,7 @@
 from econml.tests.dgp import DynamicPanelDGP
 
 
-@pytest.mark.dml
+@pytest.mark.cate_api_dml
 class TestDynamicDML(unittest.TestCase):
 
     def test_cate_api(self):

From caba5ce521e76a132e6e0eda5b4a0543edf1a4bd Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 15:43:36 -0800
Subject: [PATCH 11/35] sets up new test marks in azure pipelin

---
 azure-pipelines.yml       | 56 ++++++++++++++++++++++++++++++++++++++-
 econml/tests/test_driv.py |  6 ++---
 pyproject.toml            |  4 ++-
 3 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index dcaa3a0a1..b612c4b0e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -207,7 +207,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial)" -n 2'
+          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial or cate_api or cate_api_dml)" -n 2'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
@@ -297,6 +297,60 @@ jobs:
           testRunTitle: 'Python $(python.version), image $(imageName)'
         condition: succeededOrFailed()
 
+      - task: PublishCodeCoverageResults@1
+        displayName: 'Publish Code Coverage Results'
+        inputs:
+          codeCoverageTool: Cobertura
+          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
+
+- template: azure-pipelines-steps.yml
+  parameters:
+    package: '-e .[tf,plt]'
+    job:
+      job: Tests_serial
+      dependsOn: 'EvalChanges'
+      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
+      displayName: 'Run tests (CATE/DML)'
+      steps:
+      - script: 'pip install pytest pytest-runner && python setup.py pytest'
+        displayName: 'Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto'
+          COVERAGE_PROCESS_START: 'setup.cfg'
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results **/test-results.xml'
+        inputs:
+          testResultsFiles: '**/test-results.xml'
+          testRunTitle: 'Python $(python.version), image $(imageName)'
+        condition: succeededOrFailed()
+
+      - task: PublishCodeCoverageResults@1
+        displayName: 'Publish Code Coverage Results'
+        inputs:
+          codeCoverageTool: Cobertura
+          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
+
+- template: azure-pipelines-steps.yml
+  parameters:
+    package: '-e .[tf,plt]'
+    job:
+      job: Tests_serial
+      dependsOn: 'EvalChanges'
+      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
+      displayName: 'Run tests (CATE)'
+      steps:
+      - script: 'pip install pytest pytest-runner && python setup.py pytest'
+        displayName: 'Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
+          COVERAGE_PROCESS_START: 'setup.cfg'
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results **/test-results.xml'
+        inputs:
+          testResultsFiles: '**/test-results.xml'
+          testRunTitle: 'Python $(python.version), image $(imageName)'
+        condition: succeededOrFailed()
+
       - task: PublishCodeCoverageResults@1
         displayName: 'Publish Code Coverage Results'
         inputs:
diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index a9e0d30be..d66f8f96d 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -154,9 +154,9 @@ def eff_shape(n, d_x):
                     self.assertEqual(shape(eff), effect_shape)
 
                     # test inference
-                    const_marg_eff_int = est.const_marginal_effect_interval(X) # defer to infere
-                    marg_eff_int = est.marginal_effect_interval(T, X) # d
-                    eff_int = est.effect_interval(X, T0=T0, T1=T1)   # d
+                    const_marg_eff_int = est.const_marginal_effect_interval(X)
+                    marg_eff_int = est.marginal_effect_interval(T, X)
+                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
                     self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape)
                     self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
                     self.assertEqual(shape(eff_int), (2,) + effect_shape)
diff --git a/pyproject.toml b/pyproject.toml
index 82048ea08..8e1ed4e52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,5 +16,7 @@ markers = [
     "automl",
     "dml",
     "causal",
-    "serial"
+    "serial",
+    "cate_api", 
+    "cate_api_dml"
 ]
\ No newline at end of file

From a588897f645a33b1b8f80f278709bf4152fe7bae Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 15:51:51 -0800
Subject: [PATCH 12/35] fixes typo in pipeline yml

---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index b612c4b0e..76fe0d426 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -307,7 +307,7 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_serial
+      job: Tests_cate_dml
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
       displayName: 'Run tests (CATE/DML)'
@@ -334,7 +334,7 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_serial
+      job: Tests_cate
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
       displayName: 'Run tests (CATE)'

From 0da686032b6fcdb1068f805130e29e862854acd7 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 15:58:44 -0800
Subject: [PATCH 13/35] debug pipeline

---
 azure-pipelines.yml | 54 ---------------------------------------------
 1 file changed, 54 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 76fe0d426..0203d4426 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -302,57 +302,3 @@ jobs:
         inputs:
           codeCoverageTool: Cobertura
           summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
-
-- template: azure-pipelines-steps.yml
-  parameters:
-    package: '-e .[tf,plt]'
-    job:
-      job: Tests_cate_dml
-      dependsOn: 'EvalChanges'
-      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (CATE/DML)'
-      steps:
-      - script: 'pip install pytest pytest-runner && python setup.py pytest'
-        displayName: 'Unit tests'
-        env:
-          PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto'
-          COVERAGE_PROCESS_START: 'setup.cfg'
-      - task: PublishTestResults@2
-        displayName: 'Publish Test Results **/test-results.xml'
-        inputs:
-          testResultsFiles: '**/test-results.xml'
-          testRunTitle: 'Python $(python.version), image $(imageName)'
-        condition: succeededOrFailed()
-
-      - task: PublishCodeCoverageResults@1
-        displayName: 'Publish Code Coverage Results'
-        inputs:
-          codeCoverageTool: Cobertura
-          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
-
-- template: azure-pipelines-steps.yml
-  parameters:
-    package: '-e .[tf,plt]'
-    job:
-      job: Tests_cate
-      dependsOn: 'EvalChanges'
-      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (CATE)'
-      steps:
-      - script: 'pip install pytest pytest-runner && python setup.py pytest'
-        displayName: 'Unit tests'
-        env:
-          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
-          COVERAGE_PROCESS_START: 'setup.cfg'
-      - task: PublishTestResults@2
-        displayName: 'Publish Test Results **/test-results.xml'
-        inputs:
-          testResultsFiles: '**/test-results.xml'
-          testRunTitle: 'Python $(python.version), image $(imageName)'
-        condition: succeededOrFailed()
-
-      - task: PublishCodeCoverageResults@1
-        displayName: 'Publish Code Coverage Results'
-        inputs:
-          codeCoverageTool: Cobertura
-          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
\ No newline at end of file

From fd5b3e27b435c95aa17a8fc0b70bfcb1cff34598 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 17:28:52 -0800
Subject: [PATCH 14/35] reorganize imports

---
 econml/tests/test_statsmodels.py | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 75a544cdf..6a1276662 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -1,31 +1,31 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+import unittest
+import joblib
+
 import numpy as np
 import pytest
-from econml.dml import DML, LinearDML, NonParamDML
-from econml.dr import LinearDRLearner
-from econml.iv.dr import LinearDRIV
-from econml.iv.dml import DMLIV
-from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete
-from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
-from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression
-from econml.iv.dr._dr import _DummyCATE
-from statsmodels.regression.linear_model import WLS
-from statsmodels.tools.tools import add_constant
-from statsmodels.sandbox.regression.gmm import IV2SLS
+
+import scipy.special
+from sklearn.base import clone
 from sklearn.dummy import DummyClassifier
-from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV
 from sklearn.model_selection import KFold, StratifiedKFold
-import scipy.special
-import time
-from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS
-from econml.sklearn_extensions.linear_model import StatsModels2SLS
-import unittest
-import joblib
 from sklearn.preprocessing import PolynomialFeatures
-from sklearn.base import clone
+from statsmodels.regression.linear_model import WLS
+from statsmodels.sandbox.regression.gmm import IV2SLS
+from statsmodels.tools.tools import add_constant
+
+from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete
+from econml.dml import DML, LinearDML, NonParamDML
+from econml.dr import LinearDRLearner
+from econml.iv.dml import DMLIV
+from econml.iv.dr import LinearDRIV
+from econml.iv.dr._dr import _DummyCATE
+from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression
+from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
 
 
 class StatsModelsOLS:

From ae7086b1d8e8ce078815c658b292ce26f20a6067 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 17:36:03 -0800
Subject: [PATCH 15/35] adds back dependency

---
 econml/tests/test_statsmodels.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 6a1276662..817f6c62f 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -25,6 +25,8 @@
 from econml.iv.dr import LinearDRIV
 from econml.iv.dr._dr import _DummyCATE
 from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression
+from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS
+from econml.sklearn_extensions.linear_model import StatsModels2SLS
 from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
 
 

From 57afa35b8f0614385883fc9f639aa8da22b5f865 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 17:41:09 -0800
Subject: [PATCH 16/35] remove unused imports

---
 econml/tests/test_statsmodels.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 817f6c62f..0d7b04452 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -2,28 +2,24 @@
 # Licensed under the MIT License.
 
 import unittest
-import joblib
 
 import numpy as np
 import pytest
 
 import scipy.special
 from sklearn.base import clone
-from sklearn.dummy import DummyClassifier
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
-from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV
-from sklearn.model_selection import KFold, StratifiedKFold
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.preprocessing import PolynomialFeatures
 from statsmodels.regression.linear_model import WLS
 from statsmodels.sandbox.regression.gmm import IV2SLS
 from statsmodels.tools.tools import add_constant
 
 from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete
-from econml.dml import DML, LinearDML, NonParamDML
+from econml.dml import LinearDML, NonParamDML
 from econml.dr import LinearDRLearner
 from econml.iv.dml import DMLIV
 from econml.iv.dr import LinearDRIV
-from econml.iv.dr._dr import _DummyCATE
 from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS
 from econml.sklearn_extensions.linear_model import StatsModels2SLS

From 64c0c7f29f51f618bdfc28daf88b35aceb62ae07 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 17:50:57 -0800
Subject: [PATCH 17/35] reduce size of matrix by half

---
 econml/tests/test_statsmodels.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 0d7b04452..8dcba87c7 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -1097,10 +1097,8 @@ def split(self, X, T):
 
     def test_dml_multi_dim_treatment_outcome(self):
         """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """
-        from econml.dml import LinearDML
-        from econml.inference import StatsModelsInference
         np.random.seed(123)
-        n = 100000
+        n = 50000
         precision = .01
         precision_int = .0001
         with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True):

From 3d7340d96be9599416de79fe3a66e5789540a89d Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Wed, 23 Feb 2022 18:23:32 -0800
Subject: [PATCH 18/35] reverts n

---
 econml/tests/test_statsmodels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 8dcba87c7..dad735327 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -1098,7 +1098,7 @@ def split(self, X, T):
     def test_dml_multi_dim_treatment_outcome(self):
         """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """
         np.random.seed(123)
-        n = 50000
+        n = 100000
         precision = .01
         precision_int = .0001
         with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True):

From 194743ead58cc84e74a5a31e8f2beebc12ece403 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 24 Feb 2022 14:18:19 -0800
Subject: [PATCH 19/35] moves test_statsmodels to serial testing

---
 econml/tests/test_statsmodels.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index dad735327..d9de9ff1a 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -26,6 +26,7 @@
 from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
 
 
+@pytest.mark.serial
 class StatsModelsOLS:
     """
     Helper class to wrap a StatsModels OLS model to conform to the sklearn API.

From ac28cf084e4633fa64bfc2f3fb6aac8877ae3e5c Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 24 Feb 2022 17:50:29 -0800
Subject: [PATCH 20/35] combines other tests to single job

---
 azure-pipelines.yml | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0203d4426..499a28e2e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -280,11 +280,21 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_serial
+      job: Tests_other
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (Serial)'
+      displayName: 'Run tests (Other)'
       steps:
+      - script: 'pip install pytest pytest-runner && python setup.py pytest'
+        displayName: 'Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
+          COVERAGE_PROCESS_START: 'setup.cfg'
+      - script: 'python setup.py pytest'
+        displayName: 'Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto'
+          COVERAGE_PROCESS_START: 'setup.cfg'
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
@@ -301,4 +311,4 @@ jobs:
         displayName: 'Publish Code Coverage Results'
         inputs:
           codeCoverageTool: Cobertura
-          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
+          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
\ No newline at end of file

From 2ac1aa6e00ce8ac694a1ab1112306e9cd491f79b Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 24 Feb 2022 18:46:01 -0800
Subject: [PATCH 21/35] removes nested loops from test_random_state

---
 econml/tests/test_causal_analysis.py | 34 ++++++++++++++++------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index 316042e93..08cf83d33 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -2,11 +2,14 @@
 # Licensed under the MIT License.
 
 import unittest
+
+from contextlib import ExitStack
+import itertools
 import numpy as np
 from numpy.core.fromnumeric import squeeze
 import pandas as pd
-from contextlib import ExitStack
 import pytest
+
 from econml.solutions.causal_analysis import CausalAnalysis
 from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants
 
@@ -670,21 +673,24 @@ def test_random_state(self):
         inds = [0, 1, 2, 3]
         cats = [2, 3]
         hinds = [0, 3]
-        for n_model in ['linear', 'automl']:
-            for h_model in ['linear', 'forest']:
-                for classification in [True, False]:
-                    ca = CausalAnalysis(inds, cats, hinds, classification=classification,
-                                        nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
-                    ca.fit(X, y)
-                    glo = ca.global_causal_effect()
 
-                    ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
-                                         nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
-                    ca2.fit(X, y)
-                    glo2 = ca.global_causal_effect()
+        for n_model, h_model, classification in itertools.product(
+                                                        ['linear', 'automl'],
+                                                        ['linear', 'forest'],
+                                                        [True, False]):
+
+            ca = CausalAnalysis(inds, cats, hinds, classification=classification,
+                                nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
+            ca.fit(X, y)
+            glo = ca.global_causal_effect()
+
+            ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
+                                    nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
+            ca2.fit(X, y)
+            glo2 = ca.global_causal_effect()
 
-                    np.testing.assert_equal(glo.point.values, glo2.point.values)
-                    np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
+            np.testing.assert_equal(glo.point.values, glo2.point.values)
+            np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
 
     def test_can_set_categories(self):
         y = pd.Series(np.random.choice([0, 1], size=(500,)))

From 78c039148c39d6b042cd6cd0677588bb3db8f7f6 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Thu, 24 Feb 2022 18:50:46 -0800
Subject: [PATCH 22/35] fixes indenting

---
 econml/tests/test_causal_analysis.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index 08cf83d33..5b5ac804d 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -674,10 +674,10 @@ def test_random_state(self):
         cats = [2, 3]
         hinds = [0, 3]
 
-        for n_model, h_model, classification in itertools.product(
-                                                        ['linear', 'automl'],
-                                                        ['linear', 'forest'],
-                                                        [True, False]):
+        for n_model, h_model, classification in\
+            itertools.product(['linear', 'automl'],
+                              ['linear', 'forest'],
+                              [True, False]):
 
             ca = CausalAnalysis(inds, cats, hinds, classification=classification,
                                 nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
@@ -685,7 +685,7 @@ def test_random_state(self):
             glo = ca.global_causal_effect()
 
             ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
-                                    nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
+                                 nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
             ca2.fit(X, y)
             glo2 = ca.global_causal_effect()
 

From b6b6f49aa3ed2eea0deb3d26a1f65a57464247b2 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Fri, 25 Feb 2022 12:35:29 -0800
Subject: [PATCH 23/35] tries running causal tests in parallel

---
 azure-pipelines.yml                  | 2 +-
 econml/tests/test_causal_analysis.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 499a28e2e..2e89e548b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -261,7 +261,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "causal" -n 1'
+          PYTEST_ADDOPTS: '-m "causal" -n 2'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index 5b5ac804d..92d2d222a 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -1,10 +1,13 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+import gc
 import unittest
+import resource
 
 from contextlib import ExitStack
 import itertools
+from memory_profiler import profile
 import numpy as np
 from numpy.core.fromnumeric import squeeze
 import pandas as pd
@@ -692,6 +695,9 @@ def test_random_state(self):
             np.testing.assert_equal(glo.point.values, glo2.point.values)
             np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
 
+            del ca, glo, ca2, glo2
+            gc.collect()
+
     def test_can_set_categories(self):
         y = pd.Series(np.random.choice([0, 1], size=(500,)))
         X = pd.DataFrame({'a': np.random.normal(size=500),
@@ -784,6 +790,9 @@ def test_invalid_inds(self):
                         self.assertEqual(ca.trained_feature_indices_, [0, 1, 2, 3])  # can't handle last two
                         self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'),
                                                                          (5, 'cat_limit')])
+                    
+                    del ca
+                    gc.collect()
 
     # Add tests that guarantee that the reliance on DML feature order is not broken, such as
     # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W

From b36a2cb11c05402af7b00b8fc1bbc1c1762b6beb Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Fri, 25 Feb 2022 12:41:40 -0800
Subject: [PATCH 24/35] remove unnecessary deps

---
 econml/tests/test_causal_analysis.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index 92d2d222a..a3658b7d2 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -3,11 +3,9 @@
 
 import gc
 import unittest
-import resource
 
 from contextlib import ExitStack
 import itertools
-from memory_profiler import profile
 import numpy as np
 from numpy.core.fromnumeric import squeeze
 import pandas as pd

From 1ea012b45bf9cd6f954da2bbe893edec9cfa6006 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Fri, 25 Feb 2022 12:51:39 -0800
Subject: [PATCH 25/35] linting fix

---
 econml/tests/test_causal_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index a3658b7d2..a98af10cc 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -788,7 +788,7 @@ def test_invalid_inds(self):
                         self.assertEqual(ca.trained_feature_indices_, [0, 1, 2, 3])  # can't handle last two
                         self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'),
                                                                          (5, 'cat_limit')])
-                    
+
                     del ca
                     gc.collect()
 

From 7664f757e80d6f5c596b2f56c81cfa0c50c4823a Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Fri, 25 Feb 2022 13:28:19 -0800
Subject: [PATCH 26/35] reverts to run causal tests serially

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 2e89e548b..499a28e2e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -261,7 +261,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "causal" -n 2'
+          PYTEST_ADDOPTS: '-m "causal" -n 1'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'

From b94f45481f786abad6f6eb7258de235f48f77151 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Fri, 25 Feb 2022 15:15:21 -0800
Subject: [PATCH 27/35] correctly tags stats models

---
 econml/tests/test_statsmodels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index d9de9ff1a..9d47ba69b 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -26,7 +26,6 @@
 from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
 
 
-@pytest.mark.serial
 class StatsModelsOLS:
     """
     Helper class to wrap a StatsModels OLS model to conform to the sklearn API.
@@ -266,6 +265,7 @@ def _compare_dr_classes(est, lr, X_test, alpha=.05, tol=1e-10):
         "{}, {}".format(est.effect_interval(X_test, alpha=alpha), lr.effect_interval(X_test, alpha=alpha))
 
 
+@pytest.mark.serial
 class TestStatsModels(unittest.TestCase):
 
     def test_comp_with_lr(self):

From e943f99502819e506dc70aab568ac2f183c24010 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Mon, 7 Mar 2022 19:41:05 -0800
Subject: [PATCH 28/35] re-enables pickling test

---
 econml/tests/test_driv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index d66f8f96d..bddff8904 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -127,12 +127,12 @@ def eff_shape(n, d_x):
 
                     # TODO: serializing/deserializing for every combination -- is this necessary?
                     # ensure we can serialize unfit estimator
-                    # pickle.dumps(est)
+                    pickle.dumps(est)
 
                     est.fit(y, T, Z=Z, X=X, W=W)
 
                     # ensure we can serialize fit estimator
-                    # pickle.dumps(est)
+                    pickle.dumps(est)
 
                     # expected effect size
                     exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)

From fea98c58f036d7907aaf241e1b79420acd8bcc84 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Mon, 7 Mar 2022 19:43:40 -0800
Subject: [PATCH 29/35] uncomments options

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8e1ed4e52..80f8ed8dd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ requires = [
 build-backend = "setuptools.build_meta"
 
 [tool.pytest.ini_options]
-# addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml"
+addopts = "--junitxml=junit/test-results.xml -n auto --strict-markers --cov-config=setup.cfg --cov=econml --cov-report=xml"
 markers = [    
     "slow",
     "notebook",

From 691ea6e8c2d47ffb7e49510f0433e90b74197fa8 Mon Sep 17 00:00:00 2001
From: xrowan <crowan@LAPTOP-4ABFPS8U.localdomain>
Date: Mon, 7 Mar 2022 19:50:43 -0800
Subject: [PATCH 30/35] reorganizes unit test workflow

---
 azure-pipelines.yml | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 499a28e2e..7ed583149 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -285,18 +285,15 @@ jobs:
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
       displayName: 'Run tests (Other)'
       steps:
-      - script: 'pip install pytest pytest-runner && python setup.py pytest'
-        displayName: 'Unit tests'
-        env:
-          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
-          COVERAGE_PROCESS_START: 'setup.cfg'
+      - script: 'pip install pytest pytest-runner'
+        displayName: 'Install pytest'
       - script: 'python setup.py pytest'
-        displayName: 'Unit tests'
+        displayName: 'CATE Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "cate_api_dml" -n auto'
+          PYTEST_ADDOPTS: '-m "cate_api_dml or cate_api" -n auto'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
-        displayName: 'Unit tests'
+        displayName: 'Serial Unit tests'
         env:
           PYTEST_ADDOPTS: '-m "serial" -n 1'
           COVERAGE_PROCESS_START: 'setup.cfg'

From 01c8001837c9d22972fab0c9a8e529ea57459d12 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Wed, 30 Mar 2022 12:14:46 -0400
Subject: [PATCH 31/35] Consolidate test marks

---
 azure-pipelines.yml                  | 15 +++++----------
 econml/tests/test_causal_analysis.py | 10 ++--------
 econml/tests/test_dynamic_dml.py     |  2 +-
 econml/tests/test_tree.py            |  3 ---
 pyproject.toml                       |  4 +---
 5 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 7ed583149..7b7a27fea 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -207,7 +207,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal or serial or cate_api or cate_api_dml)" -n 2'
+          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or serial or cate_api)" -n 2'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
@@ -253,7 +253,7 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_causal
+      job: Tests_serial
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
       displayName: 'Run tests (Causal)'
@@ -261,7 +261,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "causal" -n 1'
+          PYTEST_ADDOPTS: '-m "serial" -n 1'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
@@ -280,7 +280,7 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_other
+      job: Tests_CATE_API
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
       displayName: 'Run tests (Other)'
@@ -290,12 +290,7 @@ jobs:
       - script: 'python setup.py pytest'
         displayName: 'CATE Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "cate_api_dml or cate_api" -n auto'
-          COVERAGE_PROCESS_START: 'setup.cfg'
-      - script: 'pip install pytest pytest-runner && python setup.py pytest'
-        displayName: 'Serial Unit tests'
-        env:
-          PYTEST_ADDOPTS: '-m "serial" -n 1'
+          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index a98af10cc..f99c648d0 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -1,7 +1,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import gc
 import unittest
 
 from contextlib import ExitStack
@@ -19,7 +18,7 @@ def assert_less_close(arr1, arr2):
     assert np.all(np.logical_or(arr1 <= arr2, np.isclose(arr1, arr2)))
 
 
-@pytest.mark.causal
+@pytest.mark.serial
 class TestCausalAnalysis(unittest.TestCase):
 
     def test_basic_array(self):
@@ -693,9 +692,6 @@ def test_random_state(self):
             np.testing.assert_equal(glo.point.values, glo2.point.values)
             np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
 
-            del ca, glo, ca2, glo2
-            gc.collect()
-
     def test_can_set_categories(self):
         y = pd.Series(np.random.choice([0, 1], size=(500,)))
         X = pd.DataFrame({'a': np.random.normal(size=500),
@@ -789,14 +785,12 @@ def test_invalid_inds(self):
                         self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'),
                                                                          (5, 'cat_limit')])
 
-                    del ca
-                    gc.collect()
-
     # Add tests that guarantee that the reliance on DML feature order is not broken, such as
     # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W
     # Pass an example where W is irrelevant and X is confounder
     # As long as DML doesnt change the order of the inputs, then things should be good. Otherwise X would be
     # zeroed out and the test will fail
+
     def test_scaling_transforms(self):
         # shouldn't matter if X is scaled much larger or much smaller than W, we should still get good estimates
         n = 2000
diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py
index 2f5048cc2..d007a2706 100644
--- a/econml/tests/test_dynamic_dml.py
+++ b/econml/tests/test_dynamic_dml.py
@@ -16,7 +16,7 @@
 from econml.tests.dgp import DynamicPanelDGP
 
 
-@pytest.mark.cate_api_dml
+@pytest.mark.cate_api
 class TestDynamicDML(unittest.TestCase):
 
     def test_cate_api(self):
diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py
index 7267b93fc..b5d898f56 100644
--- a/econml/tests/test_tree.py
+++ b/econml/tests/test_tree.py
@@ -258,9 +258,6 @@ def test_honest_values(self):
         np.testing.assert_array_almost_equal(tree.value.flatten(), .4 * np.ones(len(tree.value)))
 
     def test_noisy_instance(self):
-        """
-        The purpose of this test
-        """
 
         # initialize parameters
         n_samples = 5000
diff --git a/pyproject.toml b/pyproject.toml
index 80f8ed8dd..bf2e2991e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,8 +15,6 @@ markers = [
     "notebook",
     "automl",
     "dml",
-    "causal",
     "serial",
-    "cate_api", 
-    "cate_api_dml"
+    "cate_api"
 ]
\ No newline at end of file

From 6ad331555051c4e29c345e0d458192a439e707a3 Mon Sep 17 00:00:00 2001
From: "Keith Battocchi (HE/HIM)" <kebatt@microsoft.com>
Date: Fri, 1 Apr 2022 12:26:23 -0400
Subject: [PATCH 32/35] Update numpy test dependency

---
 azure-pipelines.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 7b7a27fea..de74e478b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -96,7 +96,7 @@ jobs:
       displayName: 'Notebooks (Customer Solutions)'
       steps:
       # Work around https://github.com/pypa/pip/issues/9542
-      - script: 'pip install -U numpy~=1.21.0'
+      - script: 'pip install -U numpy~=1.22.0'
         displayName: 'Upgrade numpy'
 
       - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
@@ -124,7 +124,7 @@ jobs:
       displayName: 'Notebooks (except Customer Solutions)'
       steps:
       # Work around https://github.com/pypa/pip/issues/9542
-      - script: 'pip install -U numpy~=1.21.0'
+      - script: 'pip install -U numpy~=1.22.0'
         displayName: 'Upgrade numpy'
 
       # shap 0.39 and sklearn 1.0 interact badly in these notebooks
@@ -256,7 +256,7 @@ jobs:
       job: Tests_serial
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (Causal)'
+      displayName: 'Run tests (Serial)'
       steps:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'

From 564af9e4a41454510b7dc79dab710a148bd8c600 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Mon, 4 Apr 2022 18:09:14 -0400
Subject: [PATCH 33/35] Revert numpy version

---
 azure-pipelines.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index de74e478b..ff9b84a82 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -96,10 +96,10 @@ jobs:
       displayName: 'Notebooks (Customer Solutions)'
       steps:
       # Work around https://github.com/pypa/pip/issues/9542
-      - script: 'pip install -U numpy~=1.22.0'
+      - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
 
-      - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
+      - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest'
         displayName: 'Unit tests'
         env:
           PYTEST_ADDOPTS: '-m "notebook"'
@@ -124,7 +124,7 @@ jobs:
       displayName: 'Notebooks (except Customer Solutions)'
       steps:
       # Work around https://github.com/pypa/pip/issues/9542
-      - script: 'pip install -U numpy~=1.22.0'
+      - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
 
       # shap 0.39 and sklearn 1.0 interact badly in these notebooks

From 96d729200f412282fd2a57a6d5e0893cbbc106e6 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Tue, 5 Apr 2022 12:58:11 -0400
Subject: [PATCH 34/35] Remove workaround for downlevel shap

---
 azure-pipelines.yml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ff9b84a82..7f6282ae3 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -98,7 +98,7 @@ jobs:
       # Work around https://github.com/pypa/pip/issues/9542
       - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
-
+        
       - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest'
         displayName: 'Unit tests'
         env:
@@ -126,12 +126,6 @@ jobs:
       # Work around https://github.com/pypa/pip/issues/9542
       - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
-
-      # shap 0.39 and sklearn 1.0 interact badly in these notebooks
-      # shap 0.40 has a bug in waterfall (https://github.com/slundberg/shap/issues/2283) that breaks our main tests
-      # but fixes the interaction here...
-      - script: 'pip install -U shap~=0.40.0'
-        displayName: 'Upgrade shap'
         
       - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
         displayName: 'Unit tests'

From 4e8d0dff4fa322cf5907097944a0eb7976639a61 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Tue, 5 Apr 2022 15:07:17 -0400
Subject: [PATCH 35/35] Add GC collections

---
 econml/tests/test_causal_analysis.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index f99c648d0..4293f3233 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -9,6 +9,7 @@
 from numpy.core.fromnumeric import squeeze
 import pandas as pd
 import pytest
+import gc
 
 from econml.solutions.causal_analysis import CausalAnalysis
 from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants
@@ -692,6 +693,9 @@ def test_random_state(self):
             np.testing.assert_equal(glo.point.values, glo2.point.values)
             np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
 
+            del ca, glo, ca2, glo2
+            gc.collect()
+
     def test_can_set_categories(self):
         y = pd.Series(np.random.choice([0, 1], size=(500,)))
         X = pd.DataFrame({'a': np.random.normal(size=500),
@@ -785,6 +789,9 @@ def test_invalid_inds(self):
                         self.assertEqual(ca.untrained_feature_indices_, [(4, 'cat_limit'),
                                                                          (5, 'cat_limit')])
 
+                    del ca
+                    gc.collect()
+
     # Add tests that guarantee that the reliance on DML feature order is not broken, such as
     # Creare a transformer that zeros out all variables after the first n_x variables, so it zeros out W
     # Pass an example where W is irrelevant and X is confounder