From 2470d30a6abc9db6610eced139db2010b4d65479 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 17 Nov 2024 10:39:44 +0100 Subject: [PATCH] TST (string dtype): clean-up assorted xfails --- pandas/tests/base/test_conversion.py | 9 ++------- pandas/tests/indexes/multi/test_setops.py | 5 +---- pandas/tests/indexes/test_base.py | 12 +----------- pandas/tests/io/excel/test_readers.py | 3 --- pandas/tests/io/excel/test_writers.py | 5 +---- pandas/tests/io/test_stata.py | 1 - pandas/tests/reshape/test_union_categoricals.py | 9 +++++---- 7 files changed, 10 insertions(+), 34 deletions(-) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 888e8628f8664..e3a821519c638 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.compat import HAS_PYARROW from pandas.compat.numpy import np_version_gt2 @@ -392,9 +390,6 @@ def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array): assert np.may_share_memory(result_nocopy1, result_nocopy2) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize( "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] @@ -406,13 +401,13 @@ def test_to_numpy_copy(arr, as_series, using_infer_string): # no copy by default result = obj.to_numpy() - if using_infer_string and arr.dtype == object: + if using_infer_string and arr.dtype == object and obj.dtype.storage == "pyarrow": assert np.shares_memory(arr, result) is False else: assert np.shares_memory(arr, result) is True result = obj.to_numpy(copy=False) - if using_infer_string and arr.dtype == object: + if using_infer_string and arr.dtype == object and obj.dtype.storage == "pyarrow": assert np.shares_memory(arr, result) is False else: assert np.shares_memory(arr, result) is True diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index e85091aaae608..f7544cf62e5fa 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( CategoricalIndex, @@ -754,13 +752,12 @@ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype): tm.assert_index_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_union_with_na_when_constructing_dataframe(): # GH43222 series1 = Series( (1,), index=MultiIndex.from_arrays( - [Series([None], dtype="string"), Series([None], dtype="string")] + [Series([None], dtype="str"), Series([None], dtype="str")] ), ) series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b")))) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 19b46d9b2c15f..06df8902f319c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -8,12 +8,7 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import ( - HAS_PYARROW, - IS64, -) +from pandas.compat import IS64 from pandas.errors import InvalidIndexError import pandas.util._test_decorators as td @@ -823,11 +818,6 @@ def test_isin(self, values, index, expected): expected = np.array(expected, dtype=bool) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_isin_nan_common_object( self, nulls_fixture, nulls_fixture2, using_infer_string ): diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 3989e022dbbd2..34824f0a67985 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -17,8 +17,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td import pandas as pd @@ -625,7 +623,6 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): expected = DataFrame(expected) tm.assert_frame_equal(actual, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_dtype_backend(self, read_ext, dtype_backend, engine, tmp_excel): # GH#36712 if read_ext in (".xlsb", ".xls"): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 051aa1f386d92..19fe9855dbb85 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -13,8 +13,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.compat._optional import import_optional_dependency import pandas.util._test_decorators as td @@ -1387,12 +1385,11 @@ def test_freeze_panes(self, tmp_excel): result = pd.read_excel(tmp_excel, index_col=0) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_path_path_lib(self, engine, ext): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), - index=Index([f"i-{i}" for i in range(30)], dtype=object), + index=Index([f"i-{i}" for i in range(30)]), ) writer = partial(df.to_excel, engine=engine) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 8fa85d13bbdb5..9288b98d79fbe 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1719,7 +1719,6 @@ def test_date_parsing_ignores_format_details(self, column, datapath): formatted = df.loc[0, column + "_fmt"] assert unformatted == formatted - # @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("byteorder", ["little", "big"]) def test_writer_117(self, byteorder, temp_file, using_infer_string): original = DataFrame( diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 1d5d16f39e648..081feae6fc43f 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.concat import union_categoricals import pandas as pd @@ -124,12 +122,15 @@ def test_union_categoricals_nan(self): exp = Categorical([np.nan, np.nan, np.nan, np.nan]) tm.assert_categorical_equal(res, exp) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("val", [[], ["1"]]) def test_union_categoricals_empty(self, val, request, using_infer_string): # GH 13759 if using_infer_string and val == ["1"]: - request.applymarker(pytest.mark.xfail("object and strings dont match")) + request.applymarker( + pytest.mark.xfail( + reason="TDOD(infer_string) object and strings dont match" + ) + ) res = union_categoricals([Categorical([]), Categorical(val)]) exp = Categorical(val) tm.assert_categorical_equal(res, exp)