alteryx · eccabay · May 21, 2024 · May 21, 2024 · May 22, 2024 · May 22, 2024
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -25,16 +25,16 @@ outputs:
         - setuptools >=58.0.4
       run:
         - numpy >=1.22.0 
-        - pandas >=1.5.0, <2.1.0
+        - pandas >=2.2.0
         - dask >=2022.2.0, !=2022.10.1
-        - scipy >=1.5.0, <1.12.0
+        - scipy >=1.5.0
         - scikit-learn >=1.3.2
         - scikit-optimize >=0.9.0
         - statsmodels >=0.12.2
         - colorama >=0.4.4
         - cloudpickle >=1.5.0
         - click >=8.0.0
-        - shap >=0.42.0, <0.45.0
+        - shap >=0.45.0
         - texttable >=1.6.2
         - woodwork >=0.22.0
         - featuretools >=1.16.0

diff --git a/core-requirements.txt b/core-requirements.txt
@@ -1,13 +1,13 @@
 numpy>=1.21.0
-pandas>=1.5.0, <2.1.0
-scipy>=1.5.0, <1.12.0
+pandas>=2.2.0
+scipy>=1.5.0
 scikit-learn>=1.3.2
 scikit-optimize>=0.9.0
 pyzmq>=20.0.0
 colorama>=0.4.4
 cloudpickle>=1.5.0
 click>=8.0.0
-shap>=0.42.0
+shap>=0.45.0
 statsmodels>=0.12.2
 texttable>=1.6.2
 woodwork>= 0.21.1

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -9,12 +9,13 @@ Release Notes
         * Removed vowpalwabbit :pr:`4427`
         * Uncapped holidays :pr:`4428`
         * Unpinned kaleido :pr:`4423`
+        * Unpineed pandas, scipy, and shap versions :pr:`4432`
     * Documentation Changes
     * Testing Changes
-        * Run airflow tests in Python 3.9 :pr:`4391`
-        * Remove iterative test from airflow runs :pr:`4424`
-        * Update GH actions to improve handling of potentially unsafe variables :pr:`4417`
-        * Fix install test :pr:`4423`
+        * Added ability to run airflow tests in Python 3.9 :pr:`4391`
+        * Removed iterative test from airflow runs :pr:`4424`
+        * Updated GH actions to improve handling of potentially unsafe variables :pr:`4417`
+        * Fixed install test :pr:`4423`
 
 .. warning::
 

diff --git a/evalml/data_checks/target_distribution_data_check.py b/evalml/data_checks/target_distribution_data_check.py
@@ -161,6 +161,6 @@
 
     # If the p-value of the log transformed target is greater than or equal to the p-value of the original target
     # with outliers dropped, then it would imply that the log transformed target has more of a normal distribution
-    if norm_test_log.pvalue >= norm_test_og.pvalue:
+    if round(norm_test_log.pvalue, 6) >= round(norm_test_og.pvalue, 6):
         return True, normalization_test_string, norm_test_og
     return False, normalization_test_string, norm_test_og
diff --git a/evalml/model_understanding/prediction_explanations/_algorithms.py b/evalml/model_understanding/prediction_explanations/_algorithms.py
@@ -144,15 +144,6 @@ def _compute_shap_values(pipeline, features, training_data=None):
         if ws:
             logger.debug(f"_compute_shap_values TreeExplainer: {ws[0].message}")
         shap_values = explainer.shap_values(features, check_additivity=False)
-        # shap only outputs values for positive class for Catboost/Xgboost binary estimators.
-        # this modifies the output to match the output format of other binary estimators.
-        # Ok to fill values of negative class with zeros since the negative class will get dropped
-        # in the UI anyways.
-        if estimator.model_family in {
-            ModelFamily.CATBOOST,
-            ModelFamily.XGBOOST,
-        } and is_binary(pipeline.problem_type):
-            shap_values = [np.zeros(shap_values.shape), shap_values]
     else:
         if training_data is None:
             raise ValueError(
@@ -189,16 +180,30 @@ def _compute_shap_values(pipeline, features, training_data=None):
         except IndexError:
             expected_value = explainer.expected_value
 
-    # classification problem
-    if isinstance(shap_values, list):
-        mappings = []
-        for class_shap_values in shap_values:
-            mappings.append(_create_dictionary(class_shap_values, feature_names))
-        return (mappings, expected_value)
     # regression problem
-    elif isinstance(shap_values, np.ndarray):
+    if is_regression(pipeline.problem_type):
         dic = _create_dictionary(shap_values, feature_names)
         return (dic, expected_value)
+
+    # classification problem
+    if len(shap_values.shape) == 3:
+        mappings = []
+        for class_shap_values in shap_values.T:
+            mappings.append(_create_dictionary(class_shap_values.T, feature_names))
+        return (mappings, expected_value)
+    # shap only outputs values for positive class for boosted binary estimators.
+    # this modifies the output to match the output format of other binary estimators.
+    # Ok to fill values of negative class with the positive class since the negative class
+    # will get dropped in the UI anyways.
+    if estimator.model_family in {
+        ModelFamily.CATBOOST,
+        ModelFamily.XGBOOST,
+        ModelFamily.LIGHTGBM,
+    } and is_binary(pipeline.problem_type):
+        mappings = []
+        for _ in range(2):
+            mappings.append(_create_dictionary(shap_values, feature_names))
+        return (mappings, expected_value)
     else:
         raise ValueError(f"Unknown shap_values datatype {str(type(shap_values))}!")
 

diff --git a/evalml/pipelines/components/transformers/imputers/time_series_imputer.py b/evalml/pipelines/components/transformers/imputers/time_series_imputer.py
@@ -2,10 +2,6 @@
 
 import pandas as pd
 import woodwork as ww
-from woodwork.logical_types import (
-    BooleanNullable,
-    Double,
-)
 
 from evalml.pipelines.components.transformers import Transformer
 from evalml.utils import infer_feature_types
@@ -57,11 +53,6 @@
         ["backwards_fill", "forwards_fill", "interpolate"],
     )
 
-    # Incompatibility: https://github.com/alteryx/evalml/issues/4001
-    # TODO: Remove when support is added https://github.com/alteryx/evalml/issues/4014
-    _integer_nullable_incompatibilities = ["X", "y"]
-    _boolean_nullable_incompatibilities = ["y"]
-
     def __init__(
         self,
         categorical_impute_strategy="forwards_fill",
@@ -173,7 +164,6 @@
         # This will change the logical type of BooleanNullable/IntegerNullable/AgeNullable columns with nans
         # so we save the original schema to recreate it where possible after imputation
         original_schema = X.ww.schema
-        X, y = self._handle_nullable_types(X, y)
 
         X_not_all_null = X.ww.drop(self._all_null_cols)
 
@@ -221,11 +211,22 @@
         X_not_all_null.ww.init(schema=original_schema, logical_types=new_ltypes)
 
         y_imputed = (
-            y.ww.drop(self._y_all_null_cols)
-            if isinstance(y, pd.DataFrame)
-            else pd.Series(y)
+            y.ww.drop(self._y_all_null_cols) if isinstance(y, pd.DataFrame) else y
         )
-        if y is not None and len(y) > 0:
+
+        if y is not None and not y_imputed.empty:
+            # Repeat the same type checking process as for X with y
+            y_original_schema = y_imputed.ww.schema
+            if isinstance(y, pd.Series):
+                new_ltype = _determine_non_nullable_equivalent(
+                    y_original_schema.logical_type,
+                )
+            else:
+                new_ltypes = {
+                    col: _determine_non_nullable_equivalent(ltype)
+                    for col, ltype in y_original_schema.logical_types.items()
+                }
+
             if self._impute_target == "forwards_fill":
                 y_imputed = y_imputed.pad()
                 y_imputed.bfill(inplace=True)
@@ -235,47 +236,26 @@
             elif self._impute_target == "interpolate":
                 y_imputed = y_imputed.interpolate()
                 y_imputed.bfill(inplace=True)
+
+                if isinstance(y, pd.Series):
+                    new_ltype = _determine_fractional_type(
+                        y_original_schema.logical_type,
+                    )
+                else:
+                    int_cols_to_update = y_original_schema._filter_cols(
+                        include=["IntegerNullable", "AgeNullable"],
+                    )
+                    new_int_ltypes = {
+                        col: _determine_fractional_type(ltype)
+                        for col, ltype in y_original_schema.logical_types.items()
+                        if col in int_cols_to_update
+                    }
+                    new_ltypes.update(new_int_ltypes)
+
             # Re-initialize woodwork with the downcast logical type
             if isinstance(y, pd.Series):
-                y_imputed = ww.init_series(y_imputed, logical_type=y.ww.logical_type)
+                y_imputed = ww.init_series(y_imputed, logical_type=new_ltype)
             else:
-                y_original_schema = y.ww.schema.get_subset_schema(
-                    list(y_imputed.columns),
-                )
-                y_new_ltypes = {
-                    col: _determine_non_nullable_equivalent(ltype)
-                    for col, ltype in y_original_schema.logical_types.items()
-                }
-                y_imputed.ww.init(schema=y_original_schema, logical_types=y_new_ltypes)
+                y_imputed.ww.init(schema=y_original_schema, logical_types=new_ltypes)
 
         return X_not_all_null, y_imputed
-
-    def _handle_nullable_types(self, X=None, y=None):
-        """Transforms X and y to remove any incompatible nullable types for the time series imputer when the interpolate method is used.
-
-        Args:
-            X (pd.DataFrame, optional): Input data to a component of shape [n_samples, n_features].
-                May contain nullable types.
-            y (pd.Series or pd.DataFrame, optional): The target of length [n_samples] or the
-                unstacked target for a multiseries problem of length [n_samples, n_features*n_series].
-                May contain nullable types.
-
-        Returns:
-            X, y with any incompatible nullable types downcasted to compatible equivalents when interpolate is used. Is NoOp otherwise.
-        """
-        if self._impute_target == "interpolate":
-            # For BooleanNullable, we have to avoid Categorical columns
-            # since the category dtype also has incompatibilities with linear interpolate, which is expected
-            # TODO: Avoid categorical columns for BooleanNullable in multiseries when
-            #       multiseries timeseries supports categorical
-            if isinstance(y, pd.Series) and isinstance(
-                y.ww.logical_type,
-                BooleanNullable,
-            ):
-                y = ww.init_series(y, Double)
-            else:
-                _, y = super()._handle_nullable_types(None, y)
-        if self._interpolate_cols is not None:
-            X, _ = super()._handle_nullable_types(X, None)
-
-        return X, y
diff --git a/evalml/tests/component_tests/test_time_series_imputer.py b/evalml/tests/component_tests/test_time_series_imputer.py
@@ -590,7 +590,6 @@
 @pytest.mark.parametrize(
     "nullable_y_ltype, expected_imputed_y_ltype",
     [
-        ("BooleanNullable", Double),
         ("IntegerNullable", Double),
         ("AgeNullable", AgeFractional),
     ],
@@ -638,93 +637,15 @@
     assert X.ww.get_subset_schema(
         cols_expected_to_stay_the_same,
     ) == X_imputed.ww.get_subset_schema(cols_expected_to_stay_the_same)
-    assert {
+    X_ltypes = {
         str(ltype)
         for col, ltype in X_imputed.ww.logical_types.items()
         if col in cols_expected_to_change
-    } == expected_X_ltypes
-
+    }
+    assert X_ltypes == expected_X_ltypes
     assert isinstance(y_imputed.ww.logical_type, expected_imputed_y_ltype)
 
 
-@pytest.mark.parametrize(
-    "categorical_impute_strategy",
-    ["forwards_fill", "backwards_fill"],
-)
-@pytest.mark.parametrize(
-    "numeric_impute_strategy",
-    ["forwards_fill", "backwards_fill", "interpolate"],
-)
-@pytest.mark.parametrize(
-    "target_impute_strategy",
-    ["forwards_fill", "backwards_fill", "interpolate"],
-)
-def test_imputer_nullable_handling_noop_for_non_impute_methods(
-    nullable_type_test_data,
-    nullable_type_target,
-    target_impute_strategy,
-    numeric_impute_strategy,
-    categorical_impute_strategy,
-):
-    imputer = TimeSeriesImputer(
-        categorical_impute_strategy=categorical_impute_strategy,
-        numeric_impute_strategy=numeric_impute_strategy,
-        target_impute_strategy=target_impute_strategy,
-    )
-
-    X = nullable_type_test_data(has_nans=True)
-    y = nullable_type_target(ltype="IntegerNullable", has_nans=True)
-
-    imputer.fit(X, y)
-    original_X_schema = X.ww.schema
-    original_y_schema = y.ww.schema
-    X_d, y_d = imputer._handle_nullable_types(X, y)
-
-    # Confirm that we only change inputs when interpolate is used
-    if numeric_impute_strategy != "interpolate":
-        assert X_d.ww.schema == original_X_schema
-    else:
-        assert X_d.ww.schema != original_X_schema
-
-    if target_impute_strategy != "interpolate":
-        assert y_d.ww.schema == original_y_schema
-    else:
-        assert y_d.ww.schema != original_y_schema
-
-
-@pytest.mark.parametrize(
-    "nullable_ltype",
-    ["BooleanNullable", "IntegerNullable", "AgeNullable"],
-)
-@pytest.mark.parametrize(
-    "handle_incompatibility",
-    [
-        True,
-        pytest.param(
-            False,
-            marks=pytest.mark.xfail(strict=True, raises=ValueError),
-        ),
-    ],
-)
-def test_time_series_imputer_nullable_type_incompatibility(
-    nullable_type_target,
-    handle_incompatibility,
-    nullable_ltype,
-):
-    """Testing that the nullable type incompatibility that caused us to add handling for the time series imputer
-    is still present in pandas' interpolate method. If this test is causing the test suite to fail
-    because the code below no longer raises the expected ValueError, we should confirm that the nullable
-    types now work for our use case and remove the nullable type handling logic from TimeSeriesImputer.
-    """
-    nullable_series = nullable_type_target(ltype=nullable_ltype, has_nans=True)
-    if handle_incompatibility:
-        imputer = TimeSeriesImputer(target_impute_strategy="interpolate")
-        imputer.fit(pd.DataFrame(), nullable_series)
-        _, nullable_series = imputer._handle_nullable_types(None, nullable_series)
-
-    nullable_series.interpolate()
-
-
 @pytest.mark.parametrize(
     "nans_present",
     [True, False],

diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -21,15 +21,15 @@ networkx==3.2.1
 nlp-primitives==2.13.0
 numpy==1.26.4
 packaging==24.0
-pandas==2.0.3
+pandas==2.2.2
 plotly==5.22.0
 pmdarima==2.0.4
 pyzmq==26.0.3
 scikit-learn==1.4.2
 scikit-optimize==0.10.1
-scipy==1.11.4
+scipy==1.13.0
 seaborn==0.13.2
-shap==0.44.1
+shap==0.45.1
 sktime==0.28.1
 statsmodels==0.14.2
 texttable==1.7.0

diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt
@@ -19,15 +19,15 @@ networkx==2.7
 nlp-primitives==2.9.0
 numpy==1.22.0
 packaging==23.0
-pandas==1.5.0
+pandas==2.2.0
 plotly==5.0.0
 pmdarima==1.8.5
 pyzmq==20.0.0
 scikit-learn==1.3.2
 scikit-optimize==0.9.0
 scipy==1.5.0
 seaborn==0.11.1
-shap==0.42.0
+shap==0.45.0
 sktime==0.21.0
 statsmodels==0.12.2
 texttable==1.6.2