Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unpin dependencies blocked on Python 3.8 #4432

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ outputs:
- setuptools >=58.0.4
run:
- numpy >=1.22.0
- pandas >=1.5.0, <2.1.0
- pandas >=2.2.0
- dask >=2022.2.0, !=2022.10.1
- scipy >=1.5.0, <1.12.0
- scipy >=1.5.0
- scikit-learn >=1.3.2
- scikit-optimize >=0.9.0
- statsmodels >=0.12.2
- colorama >=0.4.4
- cloudpickle >=1.5.0
- click >=8.0.0
- shap >=0.42.0, <0.45.0
- shap >=0.45.0
- texttable >=1.6.2
- woodwork >=0.22.0
- featuretools >=1.16.0
Expand Down
6 changes: 3 additions & 3 deletions core-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
numpy>=1.21.0
pandas>=1.5.0, <2.1.0
scipy>=1.5.0, <1.12.0
pandas>=2.2.0
scipy>=1.5.0
scikit-learn>=1.3.2
scikit-optimize>=0.9.0
pyzmq>=20.0.0
colorama>=0.4.4
cloudpickle>=1.5.0
click>=8.0.0
shap>=0.42.0
shap>=0.45.0
statsmodels>=0.12.2
texttable>=1.6.2
woodwork>= 0.21.1
Expand Down
9 changes: 5 additions & 4 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ Release Notes
* Removed vowpalwabbit :pr:`4427`
* Uncapped holidays :pr:`4428`
* Unpinned kaleido :pr:`4423`
* Unpineed pandas, scipy, and shap versions :pr:`4432`
* Documentation Changes
* Testing Changes
* Run airflow tests in Python 3.9 :pr:`4391`
* Remove iterative test from airflow runs :pr:`4424`
* Update GH actions to improve handling of potentially unsafe variables :pr:`4417`
* Fix install test :pr:`4423`
* Added ability to run airflow tests in Python 3.9 :pr:`4391`
* Removed iterative test from airflow runs :pr:`4424`
* Updated GH actions to improve handling of potentially unsafe variables :pr:`4417`
* Fixed install test :pr:`4423`

.. warning::

Expand Down
2 changes: 1 addition & 1 deletion evalml/data_checks/target_distribution_data_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,6 @@

# If the p-value of the log transformed target is greater than or equal to the p-value of the original target
# with outliers dropped, then it would imply that the log transformed target has more of a normal distribution
if norm_test_log.pvalue >= norm_test_og.pvalue:
if round(norm_test_log.pvalue, 6) >= round(norm_test_og.pvalue, 6):

Check warning on line 164 in evalml/data_checks/target_distribution_data_check.py

View check run for this annotation

Codecov / codecov/patch

evalml/data_checks/target_distribution_data_check.py#L164

Added line #L164 was not covered by tests
return True, normalization_test_string, norm_test_og
return False, normalization_test_string, norm_test_og
37 changes: 21 additions & 16 deletions evalml/model_understanding/prediction_explanations/_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,6 @@ def _compute_shap_values(pipeline, features, training_data=None):
if ws:
logger.debug(f"_compute_shap_values TreeExplainer: {ws[0].message}")
shap_values = explainer.shap_values(features, check_additivity=False)
# shap only outputs values for positive class for Catboost/Xgboost binary estimators.
# this modifies the output to match the output format of other binary estimators.
# Ok to fill values of negative class with zeros since the negative class will get dropped
# in the UI anyways.
if estimator.model_family in {
ModelFamily.CATBOOST,
ModelFamily.XGBOOST,
} and is_binary(pipeline.problem_type):
shap_values = [np.zeros(shap_values.shape), shap_values]
else:
if training_data is None:
raise ValueError(
Expand Down Expand Up @@ -189,16 +180,30 @@ def _compute_shap_values(pipeline, features, training_data=None):
except IndexError:
expected_value = explainer.expected_value

# classification problem
if isinstance(shap_values, list):
mappings = []
for class_shap_values in shap_values:
mappings.append(_create_dictionary(class_shap_values, feature_names))
return (mappings, expected_value)
# regression problem
elif isinstance(shap_values, np.ndarray):
if is_regression(pipeline.problem_type):
dic = _create_dictionary(shap_values, feature_names)
return (dic, expected_value)

# classification problem
if len(shap_values.shape) == 3:
mappings = []
for class_shap_values in shap_values.T:
mappings.append(_create_dictionary(class_shap_values.T, feature_names))
return (mappings, expected_value)
# shap only outputs values for positive class for boosted binary estimators.
# this modifies the output to match the output format of other binary estimators.
# Ok to fill values of negative class with the positive class since the negative class
# will get dropped in the UI anyways.
if estimator.model_family in {
ModelFamily.CATBOOST,
ModelFamily.XGBOOST,
ModelFamily.LIGHTGBM,
} and is_binary(pipeline.problem_type):
mappings = []
for _ in range(2):
mappings.append(_create_dictionary(shap_values, feature_names))
return (mappings, expected_value)
else:
raise ValueError(f"Unknown shap_values datatype {str(type(shap_values))}!")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

import pandas as pd
import woodwork as ww
from woodwork.logical_types import (
BooleanNullable,
Double,
)

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types
Expand Down Expand Up @@ -57,11 +53,6 @@
["backwards_fill", "forwards_fill", "interpolate"],
)

# Incompatibility: https://github.com/alteryx/evalml/issues/4001
# TODO: Remove when support is added https://github.com/alteryx/evalml/issues/4014
_integer_nullable_incompatibilities = ["X", "y"]
_boolean_nullable_incompatibilities = ["y"]

def __init__(
self,
categorical_impute_strategy="forwards_fill",
Expand Down Expand Up @@ -173,7 +164,6 @@
# This will change the logical type of BooleanNullable/IntegerNullable/AgeNullable columns with nans
# so we save the original schema to recreate it where possible after imputation
original_schema = X.ww.schema
X, y = self._handle_nullable_types(X, y)

X_not_all_null = X.ww.drop(self._all_null_cols)

Expand Down Expand Up @@ -221,11 +211,22 @@
X_not_all_null.ww.init(schema=original_schema, logical_types=new_ltypes)

y_imputed = (
y.ww.drop(self._y_all_null_cols)
if isinstance(y, pd.DataFrame)
else pd.Series(y)
y.ww.drop(self._y_all_null_cols) if isinstance(y, pd.DataFrame) else y
)
if y is not None and len(y) > 0:

if y is not None and not y_imputed.empty:
# Repeat the same type checking process as for X with y
y_original_schema = y_imputed.ww.schema
if isinstance(y, pd.Series):
new_ltype = _determine_non_nullable_equivalent(
y_original_schema.logical_type,
)
else:
new_ltypes = {

Check warning on line 225 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L225

Added line #L225 was not covered by tests
col: _determine_non_nullable_equivalent(ltype)
for col, ltype in y_original_schema.logical_types.items()
}

if self._impute_target == "forwards_fill":
y_imputed = y_imputed.pad()
y_imputed.bfill(inplace=True)
Expand All @@ -235,47 +236,26 @@
elif self._impute_target == "interpolate":
y_imputed = y_imputed.interpolate()
y_imputed.bfill(inplace=True)

if isinstance(y, pd.Series):
new_ltype = _determine_fractional_type(

Check warning on line 241 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L240-L241

Added lines #L240 - L241 were not covered by tests
y_original_schema.logical_type,
)
else:
int_cols_to_update = y_original_schema._filter_cols(

Check warning on line 245 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L245

Added line #L245 was not covered by tests
include=["IntegerNullable", "AgeNullable"],
)
new_int_ltypes = {

Check warning on line 248 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L248

Added line #L248 was not covered by tests
col: _determine_fractional_type(ltype)
for col, ltype in y_original_schema.logical_types.items()
if col in int_cols_to_update
}
new_ltypes.update(new_int_ltypes)

Check warning on line 253 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L253

Added line #L253 was not covered by tests

# Re-initialize woodwork with the downcast logical type
if isinstance(y, pd.Series):
y_imputed = ww.init_series(y_imputed, logical_type=y.ww.logical_type)
y_imputed = ww.init_series(y_imputed, logical_type=new_ltype)
else:
y_original_schema = y.ww.schema.get_subset_schema(
list(y_imputed.columns),
)
y_new_ltypes = {
col: _determine_non_nullable_equivalent(ltype)
for col, ltype in y_original_schema.logical_types.items()
}
y_imputed.ww.init(schema=y_original_schema, logical_types=y_new_ltypes)
y_imputed.ww.init(schema=y_original_schema, logical_types=new_ltypes)

Check warning on line 259 in evalml/pipelines/components/transformers/imputers/time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/pipelines/components/transformers/imputers/time_series_imputer.py#L259

Added line #L259 was not covered by tests

return X_not_all_null, y_imputed

def _handle_nullable_types(self, X=None, y=None):
"""Transforms X and y to remove any incompatible nullable types for the time series imputer when the interpolate method is used.

Args:
X (pd.DataFrame, optional): Input data to a component of shape [n_samples, n_features].
May contain nullable types.
y (pd.Series or pd.DataFrame, optional): The target of length [n_samples] or the
unstacked target for a multiseries problem of length [n_samples, n_features*n_series].
May contain nullable types.

Returns:
X, y with any incompatible nullable types downcasted to compatible equivalents when interpolate is used. Is NoOp otherwise.
"""
if self._impute_target == "interpolate":
# For BooleanNullable, we have to avoid Categorical columns
# since the category dtype also has incompatibilities with linear interpolate, which is expected
# TODO: Avoid categorical columns for BooleanNullable in multiseries when
# multiseries timeseries supports categorical
if isinstance(y, pd.Series) and isinstance(
y.ww.logical_type,
BooleanNullable,
):
y = ww.init_series(y, Double)
else:
_, y = super()._handle_nullable_types(None, y)
if self._interpolate_cols is not None:
X, _ = super()._handle_nullable_types(X, None)

return X, y
85 changes: 3 additions & 82 deletions evalml/tests/component_tests/test_time_series_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,6 @@
@pytest.mark.parametrize(
"nullable_y_ltype, expected_imputed_y_ltype",
[
("BooleanNullable", Double),
("IntegerNullable", Double),
("AgeNullable", AgeFractional),
],
Expand Down Expand Up @@ -638,93 +637,15 @@
assert X.ww.get_subset_schema(
cols_expected_to_stay_the_same,
) == X_imputed.ww.get_subset_schema(cols_expected_to_stay_the_same)
assert {
X_ltypes = {

Check warning on line 640 in evalml/tests/component_tests/test_time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_imputer.py#L640

Added line #L640 was not covered by tests
str(ltype)
for col, ltype in X_imputed.ww.logical_types.items()
if col in cols_expected_to_change
} == expected_X_ltypes

}
assert X_ltypes == expected_X_ltypes

Check warning on line 645 in evalml/tests/component_tests/test_time_series_imputer.py

View check run for this annotation

Codecov / codecov/patch

evalml/tests/component_tests/test_time_series_imputer.py#L645

Added line #L645 was not covered by tests
assert isinstance(y_imputed.ww.logical_type, expected_imputed_y_ltype)


@pytest.mark.parametrize(
"categorical_impute_strategy",
["forwards_fill", "backwards_fill"],
)
@pytest.mark.parametrize(
"numeric_impute_strategy",
["forwards_fill", "backwards_fill", "interpolate"],
)
@pytest.mark.parametrize(
"target_impute_strategy",
["forwards_fill", "backwards_fill", "interpolate"],
)
def test_imputer_nullable_handling_noop_for_non_impute_methods(
nullable_type_test_data,
nullable_type_target,
target_impute_strategy,
numeric_impute_strategy,
categorical_impute_strategy,
):
imputer = TimeSeriesImputer(
categorical_impute_strategy=categorical_impute_strategy,
numeric_impute_strategy=numeric_impute_strategy,
target_impute_strategy=target_impute_strategy,
)

X = nullable_type_test_data(has_nans=True)
y = nullable_type_target(ltype="IntegerNullable", has_nans=True)

imputer.fit(X, y)
original_X_schema = X.ww.schema
original_y_schema = y.ww.schema
X_d, y_d = imputer._handle_nullable_types(X, y)

# Confirm that we only change inputs when interpolate is used
if numeric_impute_strategy != "interpolate":
assert X_d.ww.schema == original_X_schema
else:
assert X_d.ww.schema != original_X_schema

if target_impute_strategy != "interpolate":
assert y_d.ww.schema == original_y_schema
else:
assert y_d.ww.schema != original_y_schema


@pytest.mark.parametrize(
"nullable_ltype",
["BooleanNullable", "IntegerNullable", "AgeNullable"],
)
@pytest.mark.parametrize(
"handle_incompatibility",
[
True,
pytest.param(
False,
marks=pytest.mark.xfail(strict=True, raises=ValueError),
),
],
)
def test_time_series_imputer_nullable_type_incompatibility(
nullable_type_target,
handle_incompatibility,
nullable_ltype,
):
"""Testing that the nullable type incompatibility that caused us to add handling for the time series imputer
is still present in pandas' interpolate method. If this test is causing the test suite to fail
because the code below no longer raises the expected ValueError, we should confirm that the nullable
types now work for our use case and remove the nullable type handling logic from TimeSeriesImputer.
"""
nullable_series = nullable_type_target(ltype=nullable_ltype, has_nans=True)
if handle_incompatibility:
imputer = TimeSeriesImputer(target_impute_strategy="interpolate")
imputer.fit(pd.DataFrame(), nullable_series)
_, nullable_series = imputer._handle_nullable_types(None, nullable_series)

nullable_series.interpolate()


@pytest.mark.parametrize(
"nans_present",
[True, False],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ networkx==3.2.1
nlp-primitives==2.13.0
numpy==1.26.4
packaging==24.0
pandas==2.0.3
pandas==2.2.2
plotly==5.22.0
pmdarima==2.0.4
pyzmq==26.0.3
scikit-learn==1.4.2
scikit-optimize==0.10.1
scipy==1.11.4
scipy==1.13.0
seaborn==0.13.2
shap==0.44.1
shap==0.45.1
sktime==0.28.1
statsmodels==0.14.2
texttable==1.7.0
Expand Down
4 changes: 2 additions & 2 deletions evalml/tests/dependency_update_check/minimum_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ networkx==2.7
nlp-primitives==2.9.0
numpy==1.22.0
packaging==23.0
pandas==1.5.0
pandas==2.2.0
plotly==5.0.0
pmdarima==1.8.5
pyzmq==20.0.0
scikit-learn==1.3.2
scikit-optimize==0.9.0
scipy==1.5.0
seaborn==0.11.1
shap==0.42.0
shap==0.45.0
sktime==0.21.0
statsmodels==0.12.2
texttable==1.6.2
Expand Down
Loading
Loading