From 6296e03ee2a9ca2a8e42606e06d75d146ec1af8c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Nov 2023 17:39:51 -0800 Subject: [PATCH 1/2] TST: de-xfail some pyarrow tests (#55918) * TST: un-xfail pyarrow verbose tests * un-xfail pyarrow tests * de-xfail pyarrow tests * de-xfail pyarrow tests * de-xfail pyarrow tests * de-xfail pyarrow tests * de-xfail pyarrow tests * de-xfail pyarrow test * De-xfail pyarrow tests --- pandas/io/parsers/arrow_parser_wrapper.py | 9 ++- .../io/parser/common/test_common_basic.py | 72 ++++++++++++++++--- pandas/tests/io/parser/common/test_decimal.py | 11 ++- .../io/parser/common/test_file_buffer_url.py | 46 +++++++++--- pandas/tests/io/parser/common/test_ints.py | 24 ++++--- .../tests/io/parser/common/test_iterator.py | 29 ++++++-- .../io/parser/common/test_read_errors.py | 40 ++++++++--- pandas/tests/io/parser/common/test_verbose.py | 16 +++-- .../io/parser/dtypes/test_categorical.py | 18 +++-- .../io/parser/dtypes/test_dtypes_basic.py | 23 +++--- 10 files changed, 226 insertions(+), 62 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 35965c90ee7fb..a1d69deb6a21e 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -13,6 +13,7 @@ ) from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.inference import is_integer import pandas as pd @@ -203,7 +204,13 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame: # Ignore non-existent columns from dtype mapping # like other parsers do if isinstance(self.dtype, dict): - self.dtype = {k: v for k, v in self.dtype.items() if k in frame.columns} + self.dtype = { + k: pandas_dtype(v) + for k, v in self.dtype.items() + if k in frame.columns + } + else: + self.dtype = pandas_dtype(self.dtype) try: frame = frame.astype(self.dtype) except TypeError as e: diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 3abbd14c20e16..a2ffec45cfc7f 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -119,7 +119,6 @@ def test_read_csv_local(all_parsers, csv1): tm.assert_frame_equal(result, expected) -@xfail_pyarrow def test_1000_sep(all_parsers): parser = all_parsers data = """A|B|C @@ -128,6 +127,12 @@ def test_1000_sep(all_parsers): """ expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]}) + if parser.engine == "pyarrow": + msg = "The 'thousands' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), sep="|", thousands=",") + return + result = parser.read_csv(StringIO(data), sep="|", thousands=",") tm.assert_frame_equal(result, expected) @@ -161,7 +166,6 @@ def test_csv_mixed_type(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow def test_read_csv_low_memory_no_rows_with_index(all_parsers): # see gh-21141 parser = all_parsers @@ -174,6 +178,13 @@ def test_read_csv_low_memory_no_rows_with_index(all_parsers): 2,2,3,4 3,3,4,5 """ + + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0) + return + result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0) expected = DataFrame(columns=["A", "B", "C"]) tm.assert_frame_equal(result, expected) @@ -212,7 +223,6 @@ def test_read_csv_dataframe(all_parsers, csv1): tm.assert_frame_equal(result, expected) -@xfail_pyarrow @pytest.mark.parametrize("nrows", [3, 3.0]) def test_read_nrows(all_parsers, nrows): # see gh-10476 @@ -230,11 +240,16 @@ def test_read_nrows(all_parsers, nrows): ) parser = all_parsers + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), nrows=nrows) + return + result = parser.read_csv(StringIO(data), nrows=nrows) tm.assert_frame_equal(result, expected) -@xfail_pyarrow @pytest.mark.parametrize("nrows", [1.2, "foo", -1]) def test_read_nrows_bad(all_parsers, nrows): data = """index,A,B,C,D @@ -247,6 +262,8 @@ def test_read_nrows_bad(all_parsers, nrows): """ msg = r"'nrows' must be an integer >=0" parser = all_parsers + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(data), nrows=nrows) @@ -277,7 +294,6 @@ def test_missing_trailing_delimiters(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow def test_skip_initial_space(all_parsers): data = ( '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' @@ -289,6 +305,18 @@ def test_skip_initial_space(all_parsers): ) parser = all_parsers + if parser.engine == "pyarrow": + msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + names=list(range(33)), + header=None, + na_values=["-9999.0"], + skipinitialspace=True, + ) + return + result = parser.read_csv( StringIO(data), names=list(range(33)), @@ -437,7 +465,6 @@ def test_read_empty_with_usecols(all_parsers, data, kwargs, expected): tm.assert_frame_equal(result, expected) -@xfail_pyarrow @pytest.mark.parametrize( "kwargs,expected", [ @@ -467,6 +494,12 @@ def test_trailing_spaces(all_parsers, kwargs, expected): data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa: E501 parser = all_parsers + if parser.engine == "pyarrow": + msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data.replace(",", " ")), **kwargs) + return + result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs) tm.assert_frame_equal(result, expected) @@ -488,7 +521,6 @@ def test_read_filepath_or_buffer(all_parsers): parser.read_csv(filepath_or_buffer=b"input") -@xfail_pyarrow @pytest.mark.parametrize("delim_whitespace", [True, False]) def test_single_char_leading_whitespace(all_parsers, delim_whitespace): # see gh-9710 @@ -501,6 +533,15 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace): b\n""" expected = DataFrame({"MyColumn": list("abab")}) + + if parser.engine == "pyarrow": + msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace + ) + return + result = parser.read_csv( StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace ) @@ -688,7 +729,6 @@ def test_first_row_bom_unquoted(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow @pytest.mark.parametrize("nrows", range(1, 6)) def test_blank_lines_between_header_and_data_rows(all_parsers, nrows): # GH 28071 @@ -698,6 +738,15 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows): ) csv = "\nheader\n\na,b\n\n\n1,2\n\n3,4" parser = all_parsers + + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False + ) + return + df = parser.read_csv(StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False) tm.assert_frame_equal(df, ref[:nrows]) @@ -731,11 +780,16 @@ def test_read_csv_names_not_accepting_sets(all_parsers): parser.read_csv(StringIO(data), names=set("QAZ")) -@xfail_pyarrow def test_read_table_delim_whitespace_default_sep(all_parsers): # GH: 35958 f = StringIO("a b c\n1 -2 -3\n4 5 6") parser = all_parsers + + if parser.engine == "pyarrow": + msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True) + return result = parser.read_table(f, delim_whitespace=True) expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py index b8a68c138eeff..4ceca037f589a 100644 --- a/pandas/tests/io/parser/common/test_decimal.py +++ b/pandas/tests/io/parser/common/test_decimal.py @@ -13,10 +13,7 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") - -@xfail_pyarrow @pytest.mark.parametrize( "data,thousands,decimal", [ @@ -42,6 +39,14 @@ def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal): parser = all_parsers expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]}) + if parser.engine == "pyarrow": + msg = "The 'thousands' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), sep="|", thousands=thousands, decimal=decimal + ) + return + result = parser.read_csv( StringIO(data), sep="|", thousands=thousands, decimal=decimal ) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 5d5814e880f8b..7fd86e956b543 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -214,8 +214,14 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request): # see gh-10728, gh-10548 parser = all_parsers + if parser.engine == "pyarrow" and "comment" in kwargs: + msg = "The 'comment' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + return + if parser.engine == "pyarrow" and "\r" not in data: - mark = pytest.mark.xfail(reason="The 'comment' option is not supported") + mark = pytest.mark.xfail(reason="Mismatched exception type/message") request.applymarker(mark) if expected is None: @@ -356,7 +362,6 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding): assert not handle.closed -@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_memory_map_compression(all_parsers, compression): """ Support memory map for compressed files. @@ -369,19 +374,32 @@ def test_memory_map_compression(all_parsers, compression): with tm.ensure_clean() as path: expected.to_csv(path, index=False, compression=compression) - tm.assert_frame_equal( - parser.read_csv(path, memory_map=True, compression=compression), - expected, - ) + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(path, memory_map=True, compression=compression) + return + + result = parser.read_csv(path, memory_map=True, compression=compression) + + tm.assert_frame_equal( + result, + expected, + ) -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_context_manager(all_parsers, datapath): # make sure that opened files are closed parser = all_parsers path = datapath("io", "data", "csv", "iris.csv") + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(path, chunksize=1) + return + reader = parser.read_csv(path, chunksize=1) assert not reader.handles.handle.closed try: @@ -392,12 +410,17 @@ def test_context_manager(all_parsers, datapath): assert reader.handles.handle.closed -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_context_manageri_user_provided(all_parsers, datapath): # make sure that user-provided handles are not closed parser = all_parsers with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path: + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(path, chunksize=1) + return + reader = parser.read_csv(path, chunksize=1) assert not reader.handles.handle.closed try: @@ -417,7 +440,6 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write): parser.read_csv(path) -@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_memory_map(all_parsers, csv_dir_path): mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") parser = all_parsers @@ -426,5 +448,11 @@ def test_memory_map(all_parsers, csv_dir_path): {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]} ) + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(mmap_file, memory_map=True) + return + result = parser.read_csv(mmap_file, memory_map=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index 086b43be59823..41bfbb55d818f 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -126,10 +126,8 @@ def test_int64_min_issues(all_parsers): tm.assert_frame_equal(result, expected) -# ValueError: The 'converters' option is not supported with the 'pyarrow' engine -@xfail_pyarrow @pytest.mark.parametrize("conv", [None, np.int64, np.uint64]) -def test_int64_overflow(all_parsers, conv): +def test_int64_overflow(all_parsers, conv, request): data = """ID 00013007854817840016671868 00013007854817840016749251 @@ -143,6 +141,10 @@ def test_int64_overflow(all_parsers, conv): if conv is None: # 13007854817840016671868 > UINT64_MAX, so this # will overflow and return object as the dtype. + if parser.engine == "pyarrow": + mark = pytest.mark.xfail(reason="parses to float64") + request.applymarker(mark) + result = parser.read_csv(StringIO(data)) expected = DataFrame( [ @@ -161,13 +163,19 @@ def test_int64_overflow(all_parsers, conv): # 13007854817840016671868 > UINT64_MAX, so attempts # to cast to either int64 or uint64 will result in # an OverflowError being raised. - msg = ( - "(Python int too large to convert to C long)|" - "(long too big to convert)|" - "(int too big to convert)" + msg = "|".join( + [ + "Python int too large to convert to C long", + "long too big to convert", + "int too big to convert", + ] ) + err = OverflowError + if parser.engine == "pyarrow": + err = ValueError + msg = "The 'converters' option is not supported with the 'pyarrow' engine" - with pytest.raises(OverflowError, match=msg): + with pytest.raises(err, match=msg): parser.read_csv(StringIO(data), converters={"ID": conv}) diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 26619857bd231..a521c84aa007d 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -15,10 +15,8 @@ pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") -@xfail_pyarrow # ValueError: The 'iterator' option is not supported def test_iterator(all_parsers): # see gh-6607 data = """index,A,B,C,D @@ -33,6 +31,13 @@ def test_iterator(all_parsers): kwargs = {"index_col": 0} expected = parser.read_csv(StringIO(data), **kwargs) + + if parser.engine == "pyarrow": + msg = "The 'iterator' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), iterator=True, **kwargs) + return + with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader: first_chunk = reader.read(3) tm.assert_frame_equal(first_chunk, expected[:3]) @@ -41,7 +46,6 @@ def test_iterator(all_parsers): tm.assert_frame_equal(last_chunk, expected[3:]) -@xfail_pyarrow # ValueError: The 'iterator' option is not supported def test_iterator2(all_parsers): parser = all_parsers data = """A,B,C @@ -50,6 +54,12 @@ def test_iterator2(all_parsers): baz,7,8,9 """ + if parser.engine == "pyarrow": + msg = "The 'iterator' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), iterator=True) + return + with parser.read_csv(StringIO(data), iterator=True) as reader: result = list(reader) @@ -61,7 +71,6 @@ def test_iterator2(all_parsers): tm.assert_frame_equal(result[0], expected) -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_iterator_stop_on_chunksize(all_parsers): # gh-3967: stopping iteration when chunksize is specified parser = all_parsers @@ -70,6 +79,11 @@ def test_iterator_stop_on_chunksize(all_parsers): bar,4,5,6 baz,7,8,9 """ + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), chunksize=1) + return with parser.read_csv(StringIO(data), chunksize=1) as reader: result = list(reader) @@ -83,7 +97,6 @@ def test_iterator_stop_on_chunksize(all_parsers): tm.assert_frame_equal(concat(result), expected) -@xfail_pyarrow # AssertionError: Regex pattern did not match @pytest.mark.parametrize( "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}] ) @@ -92,6 +105,12 @@ def test_iterator_skipfooter_errors(all_parsers, kwargs): parser = all_parsers data = "a\n1\n2" + if parser.engine == "pyarrow": + msg = ( + "The '(chunksize|iterator)' option is not supported with the " + "'pyarrow' engine" + ) + with pytest.raises(ValueError, match=msg): with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _: pass diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 52ddb38192a6b..7e841ed8b4ebd 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -63,7 +63,6 @@ def test_bad_stream_exception(all_parsers, csv_dir_path): parser.read_csv(stream) -@xfail_pyarrow # ValueError: The 'comment' option is not supported def test_malformed(all_parsers): # see gh-6607 parser = all_parsers @@ -74,11 +73,14 @@ def test_malformed(all_parsers): 2,3,4 """ msg = "Expected 3 fields in line 4, saw 5" - with pytest.raises(ParserError, match=msg): + err = ParserError + if parser.engine == "pyarrow": + msg = "The 'comment' option is not supported with the 'pyarrow' engine" + err = ValueError + with pytest.raises(err, match=msg): parser.read_csv(StringIO(data), header=1, comment="#") -@xfail_pyarrow # ValueError: The 'iterator' option is not supported @pytest.mark.parametrize("nrows", [5, 3, None]) def test_malformed_chunks(all_parsers, nrows): data = """ignore @@ -90,6 +92,20 @@ def test_malformed_chunks(all_parsers, nrows): 2,3,4 """ parser = all_parsers + + if parser.engine == "pyarrow": + msg = "The 'iterator' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + header=1, + comment="#", + iterator=True, + chunksize=1, + skiprows=[2], + ) + return + msg = "Expected 3 fields in line 6, saw 5" with parser.read_csv( StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2] @@ -239,19 +255,21 @@ def test_null_byte_char(request, all_parsers): parser.read_csv(StringIO(data), names=names) -# ValueError: the 'pyarrow' engine does not support sep=None with delim_whitespace=False -@xfail_pyarrow @pytest.mark.filterwarnings("always::ResourceWarning") def test_open_file(request, all_parsers): # GH 39024 parser = all_parsers + + msg = "Could not determine delimiter" + err = csv.Error if parser.engine == "c": - request.applymarker( - pytest.mark.xfail( - reason=f"{parser.engine} engine does not support sep=None " - f"with delim_whitespace=False" - ) + msg = "the 'c' engine does not support sep=None with delim_whitespace=False" + err = ValueError + elif parser.engine == "pyarrow": + msg = ( + "the 'pyarrow' engine does not support sep=None with delim_whitespace=False" ) + err = ValueError with tm.ensure_clean() as path: file = Path(path) @@ -259,7 +277,7 @@ def test_open_file(request, all_parsers): with tm.assert_produces_warning(None): # should not trigger a ResourceWarning - with pytest.raises(csv.Error, match="Could not determine delimiter"): + with pytest.raises(err, match=msg): parser.read_csv(file, sep=None, encoding_errors="replace") diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py index bcfb9cd4032ad..14deba8b40b22 100644 --- a/pandas/tests/io/parser/common/test_verbose.py +++ b/pandas/tests/io/parser/common/test_verbose.py @@ -6,10 +6,7 @@ import pytest -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") - -@xfail_pyarrow # ValueError: The 'verbose' option is not supported def test_verbose_read(all_parsers, capsys): parser = all_parsers data = """a,b,c,d @@ -22,6 +19,12 @@ def test_verbose_read(all_parsers, capsys): one,1,2,3 two,1,2,3""" + if parser.engine == "pyarrow": + msg = "The 'verbose' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), verbose=True) + return + # Engines are verbose in different ways. parser.read_csv(StringIO(data), verbose=True) captured = capsys.readouterr() @@ -33,7 +36,6 @@ def test_verbose_read(all_parsers, capsys): assert captured.out == "Filled 3 NA values in column a\n" -@xfail_pyarrow # ValueError: The 'verbose' option is not supported def test_verbose_read2(all_parsers, capsys): parser = all_parsers data = """a,b,c,d @@ -46,6 +48,12 @@ def test_verbose_read2(all_parsers, capsys): seven,1,2,3 eight,1,2,3""" + if parser.engine == "pyarrow": + msg = "The 'verbose' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), verbose=True, index_col=0) + return + parser.read_csv(StringIO(data), verbose=True, index_col=0) captured = capsys.readouterr() diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index c7586bd9334ef..b1b35447b60c2 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -146,8 +146,6 @@ def test_categorical_dtype_utf16(all_parsers, csv_dir_path): tm.assert_frame_equal(actual, expected) -# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine -@xfail_pyarrow def test_categorical_dtype_chunksize_infer_categories(all_parsers): # see gh-10153 parser = all_parsers @@ -160,6 +158,13 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers): DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}), DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]), ] + + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dtype={"b": "category"}, chunksize=2) + return + with parser.read_csv( StringIO(data), dtype={"b": "category"}, chunksize=2 ) as actuals: @@ -167,8 +172,6 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers): tm.assert_frame_equal(actual, expected) -# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine -@xfail_pyarrow def test_categorical_dtype_chunksize_explicit_categories(all_parsers): # see gh-10153 parser = all_parsers @@ -186,6 +189,13 @@ def test_categorical_dtype_chunksize_explicit_categories(all_parsers): ), ] dtype = CategoricalDtype(cats) + + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) + return + with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals: for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 3f3d340ab2e08..32b4b1dedc3cb 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -73,7 +73,6 @@ def test_dtype_per_column(all_parsers): tm.assert_frame_equal(result, expected) -@pytest.mark.usefixtures("pyarrow_xfail") def test_invalid_dtype_per_column(all_parsers): parser = all_parsers data = """\ @@ -87,7 +86,6 @@ def test_invalid_dtype_per_column(all_parsers): parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) -@pytest.mark.usefixtures("pyarrow_xfail") def test_raise_on_passed_int_dtype_with_nas(all_parsers): # see gh-2631 parser = all_parsers @@ -96,22 +94,31 @@ def test_raise_on_passed_int_dtype_with_nas(all_parsers): 2001,,11 2001,106380451,67""" - msg = ( - "Integer column has NA values" - if parser.engine == "c" - else "Unable to convert column DOY" - ) + if parser.engine == "c": + msg = "Integer column has NA values" + elif parser.engine == "pyarrow": + msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine" + else: + msg = "Unable to convert column DOY" + with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True) -@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_with_converters(all_parsers): parser = all_parsers data = """a,b 1.1,2.2 1.2,2.3""" + if parser.engine == "pyarrow": + msg = "The 'converters' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)} + ) + return + # Dtype spec ignored if converted specified. result = parser.read_csv_check_warnings( ParserWarning, From b2d9ec17c52084ee2b629633c9119c01ea11d387 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 11 Nov 2023 14:52:00 -0500 Subject: [PATCH 2/2] ASV: avoid "H" and "S" freq deprecations (#55921) update ASVs for freq deprecations --- asv_bench/benchmarks/algorithms.py | 8 ++++---- asv_bench/benchmarks/arithmetic.py | 2 +- asv_bench/benchmarks/frame_methods.py | 6 +++--- asv_bench/benchmarks/gil.py | 2 +- asv_bench/benchmarks/groupby.py | 2 +- asv_bench/benchmarks/indexing.py | 4 ++-- asv_bench/benchmarks/inference.py | 4 ++-- asv_bench/benchmarks/io/csv.py | 12 ++++++------ asv_bench/benchmarks/io/excel.py | 2 +- asv_bench/benchmarks/io/hdf.py | 2 +- asv_bench/benchmarks/io/json.py | 10 +++++----- asv_bench/benchmarks/io/pickle.py | 2 +- asv_bench/benchmarks/io/stata.py | 2 +- asv_bench/benchmarks/join_merge.py | 2 +- asv_bench/benchmarks/period.py | 2 +- asv_bench/benchmarks/series_methods.py | 4 ++-- asv_bench/benchmarks/strftime.py | 4 ++-- asv_bench/benchmarks/timeseries.py | 12 ++++++------ 18 files changed, 41 insertions(+), 41 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 192f19c36b47d..6ab8e4f14e979 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -50,9 +50,9 @@ def setup(self, unique, sort, dtype): "float": pd.Index(np.random.randn(N), dtype="float64"), "object_str": string_index, "object": pd.Index(np.arange(N), dtype="object"), - "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N), "datetime64[ns, tz]": pd.date_range( - "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo" ), "Int64": pd.array(np.arange(N), dtype="Int64"), "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"), @@ -93,9 +93,9 @@ def setup(self, unique, keep, dtype): "uint": pd.Index(np.arange(N), dtype="uint64"), "float": pd.Index(np.random.randn(N), dtype="float64"), "string": tm.makeStringIndex(N), - "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N), "datetime64[ns, tz]": pd.date_range( - "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo" ), "timestamp[ms][pyarrow]": pd.Index( np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms")) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 49543c166d047..d70ad144a3455 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -491,7 +491,7 @@ class BinaryOpsMultiIndex: param_names = ["func"] def setup(self, func): - array = date_range("20200101 00:00", "20200102 0:00", freq="S") + array = date_range("20200101 00:00", "20200102 0:00", freq="s") level_0_names = [str(i) for i in range(30)] index = pd.MultiIndex.from_product([level_0_names, array]) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index e56fbf1d8c32f..c4ab73553cf1a 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -439,9 +439,9 @@ def setup(self, inplace, dtype): N, M = 10000, 100 if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"): data = { - "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns]": date_range("2011-01-01", freq="h", periods=N), "datetime64[ns, tz]": date_range( - "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo" ), "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"), } @@ -649,7 +649,7 @@ def time_series_nunique_nan(self): class Duplicated: def setup(self): n = 1 << 20 - t = date_range("2015-01-01", freq="S", periods=(n // 64)) + t = date_range("2015-01-01", freq="s", periods=(n // 64)) xs = np.random.randn(n // 64).round(2) self.df = DataFrame( { diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 4993ffd2c47d0..fb4523f78ccb5 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -212,7 +212,7 @@ def run(dti): def time_datetime_to_period(self): @test_parallel(num_threads=2) def run(dti): - dti.to_period("S") + dti.to_period("s") run(self.dti) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 54c240e84243a..d36d88e7b6b42 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -238,7 +238,7 @@ def time_series_nth(self, dtype): class DateAttributes: def setup(self): - rng = date_range("1/1/2000", "12/31/2005", freq="H") + rng = date_range("1/1/2000", "12/31/2005", freq="h") self.year, self.month, self.day = rng.year, rng.month, rng.day self.ts = Series(np.random.randn(len(rng)), index=rng) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 84d95a23bd446..d8b1bf327294a 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -232,7 +232,7 @@ def setup(self, index): N = 100000 indexes = { "int": Index(np.arange(N), dtype=np.int64), - "datetime": date_range("2011-01-01", freq="S", periods=N), + "datetime": date_range("2011-01-01", freq="s", periods=N), } index = indexes[index] self.s = Series(np.random.rand(N), index=index) @@ -465,7 +465,7 @@ def time_loc_row(self, unique_cols): class AssignTimeseriesIndex: def setup(self): N = 100000 - idx = date_range("1/1/2000", periods=N, freq="H") + idx = date_range("1/1/2000", periods=N, freq="h") self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx) def time_frame_assign_timeseries_index(self): diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 476ff14dcc92a..805b0c807452c 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -164,7 +164,7 @@ def time_unique_date_strings(self, cache, count): class ToDatetimeISO8601: def setup(self): - rng = date_range(start="1/1/2000", periods=20000, freq="H") + rng = date_range(start="1/1/2000", periods=20000, freq="h") self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist() self.strings_tz_space = [ @@ -276,7 +276,7 @@ def time_dup_string_tzoffset_dates(self, cache): # GH 43901 class ToDatetimeInferDatetimeFormat: def setup(self): - rng = date_range(start="1/1/2000", periods=100000, freq="H") + rng = date_range(start="1/1/2000", periods=100000, freq="h") self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() def time_infer_datetime_format(self): diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index c5e3e80571e30..1826291034dee 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -89,7 +89,7 @@ class ToCSVDatetimeIndex(BaseIO): fname = "__test__.csv" def setup(self): - rng = date_range("2000", periods=100_000, freq="S") + rng = date_range("2000", periods=100_000, freq="s") self.data = DataFrame({"a": 1}, index=rng) def time_frame_date_formatting_index(self): @@ -102,7 +102,7 @@ def time_frame_date_no_format_index(self): class ToCSVPeriod(BaseIO): fname = "__test__.csv" - params = ([1000, 10000], ["D", "H"]) + params = ([1000, 10000], ["D", "h"]) param_names = ["nobs", "freq"] def setup(self, nobs, freq): @@ -110,7 +110,7 @@ def setup(self, nobs, freq): self.data = DataFrame(rng) if freq == "D": self.default_fmt = "%Y-%m-%d" - elif freq == "H": + elif freq == "h": self.default_fmt = "%Y-%m-%d %H:00" def time_frame_period_formatting_default(self, nobs, freq): @@ -130,7 +130,7 @@ def time_frame_period_formatting(self, nobs, freq): class ToCSVPeriodIndex(BaseIO): fname = "__test__.csv" - params = ([1000, 10000], ["D", "H"]) + params = ([1000, 10000], ["D", "h"]) param_names = ["nobs", "freq"] def setup(self, nobs, freq): @@ -138,7 +138,7 @@ def setup(self, nobs, freq): self.data = DataFrame({"a": 1}, index=rng) if freq == "D": self.default_fmt = "%Y-%m-%d" - elif freq == "H": + elif freq == "h": self.default_fmt = "%Y-%m-%d %H:00" def time_frame_period_formatting_index(self, nobs, freq): @@ -253,7 +253,7 @@ class ReadCSVConcatDatetime(StringIORewind): iso8601 = "%Y-%m-%d %H:%M:%S" def setup(self): - rng = date_range("1/1/2000", periods=50000, freq="S") + rng = date_range("1/1/2000", periods=50000, freq="s") self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist())) def time_read_csv(self): diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index c77c6b6f5727c..f8d81b0f6a699 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -25,7 +25,7 @@ def _generate_dataframe(): df = DataFrame( np.random.randn(N, C), columns=[f"float{i}" for i in range(C)], - index=date_range("20000101", periods=N, freq="H"), + index=date_range("20000101", periods=N, freq="h"), ) df["object"] = tm.makeStringIndex(N) return df diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index f3e417e717609..195aaa158e178 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -122,7 +122,7 @@ def setup(self, format): self.df = DataFrame( np.random.randn(N, C), columns=[f"float{i}" for i in range(C)], - index=date_range("20000101", periods=N, freq="H"), + index=date_range("20000101", periods=N, freq="h"), ) self.df["object"] = tm.makeStringIndex(N) self.df.to_hdf(self.fname, "df", format=format) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index bebf6ee993aba..8a2e3fa87eb37 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -26,7 +26,7 @@ def setup(self, orient, index): N = 100000 indexes = { "int": np.arange(N), - "datetime": date_range("20000101", periods=N, freq="H"), + "datetime": date_range("20000101", periods=N, freq="h"), } df = DataFrame( np.random.randn(N, 5), @@ -48,7 +48,7 @@ def setup(self, index): N = 100000 indexes = { "int": np.arange(N), - "datetime": date_range("20000101", periods=N, freq="H"), + "datetime": date_range("20000101", periods=N, freq="h"), } df = DataFrame( np.random.randn(N, 5), @@ -108,7 +108,7 @@ class ToJSON(BaseIO): def setup(self, orient, frame): N = 10**5 ncols = 5 - index = date_range("20000101", periods=N, freq="H") + index = date_range("20000101", periods=N, freq="h") timedeltas = timedelta_range(start=1, periods=N, freq="s") datetimes = date_range(start=1, periods=N, freq="s") ints = np.random.randint(100000000, size=N) @@ -191,7 +191,7 @@ class ToJSONISO(BaseIO): def setup(self, orient): N = 10**5 - index = date_range("20000101", periods=N, freq="H") + index = date_range("20000101", periods=N, freq="h") timedeltas = timedelta_range(start=1, periods=N, freq="s") datetimes = date_range(start=1, periods=N, freq="s") self.df = DataFrame( @@ -214,7 +214,7 @@ class ToJSONLines(BaseIO): def setup(self): N = 10**5 ncols = 5 - index = date_range("20000101", periods=N, freq="H") + index = date_range("20000101", periods=N, freq="h") timedeltas = timedelta_range(start=1, periods=N, freq="s") datetimes = date_range(start=1, periods=N, freq="s") ints = np.random.randint(100000000, size=N) diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py index c71cdcdcc5c59..54631d9236887 100644 --- a/asv_bench/benchmarks/io/pickle.py +++ b/asv_bench/benchmarks/io/pickle.py @@ -20,7 +20,7 @@ def setup(self): self.df = DataFrame( np.random.randn(N, C), columns=[f"float{i}" for i in range(C)], - index=date_range("20000101", periods=N, freq="H"), + index=date_range("20000101", periods=N, freq="h"), ) self.df["object"] = tm.makeStringIndex(N) self.df.to_pickle(self.fname) diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py index 300b9c778f1f8..750bcf4ccee5c 100644 --- a/asv_bench/benchmarks/io/stata.py +++ b/asv_bench/benchmarks/io/stata.py @@ -23,7 +23,7 @@ def setup(self, convert_dates): self.df = DataFrame( np.random.randn(N, C), columns=[f"float{i}" for i in range(C)], - index=date_range("20000101", periods=N, freq="H"), + index=date_range("20000101", periods=N, freq="h"), ) self.df["object"] = tm.makeStringIndex(self.N) self.df["int8_"] = np.random.randint( diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 04ac47a892a22..23824c2c748df 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -213,7 +213,7 @@ class JoinNonUnique: # GH 6329 def setup(self): date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="min") - daily_dates = date_index.to_period("D").to_timestamp("S", "S") + daily_dates = date_index.to_period("D").to_timestamp("s", "s") self.fracofday = date_index.values - daily_dates.values self.fracofday = self.fracofday.astype("timedelta64[ns]") self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000 diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 501fe198d41d8..ccd86cae06d58 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -45,7 +45,7 @@ def time_from_ints_daily(self, freq, is_offset): class DataFramePeriodColumn: def setup(self): - self.rng = period_range(start="1/1/1990", freq="S", periods=20000) + self.rng = period_range(start="1/1/1990", freq="s", periods=20000) self.df = DataFrame(index=range(len(self.rng))) def time_setitem_period_column(self): diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index f52f7a4bef37a..459d562828f88 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -64,7 +64,7 @@ def setup(self, dtype): N = 10**6 data = { "int": np.random.randint(1, 10, N), - "datetime": date_range("2000-01-01", freq="S", periods=N), + "datetime": date_range("2000-01-01", freq="s", periods=N), } self.s = Series(data[dtype]) if dtype == "datetime": @@ -92,7 +92,7 @@ class Fillna: def setup(self, dtype): N = 10**6 if dtype == "datetime64[ns]": - data = date_range("2000-01-01", freq="S", periods=N) + data = date_range("2000-01-01", freq="s", periods=N) na_value = NaT elif dtype in ("float64", "Float64"): data = np.random.randn(N) diff --git a/asv_bench/benchmarks/strftime.py b/asv_bench/benchmarks/strftime.py index 39cc82e1bdf79..47f25b331ab9b 100644 --- a/asv_bench/benchmarks/strftime.py +++ b/asv_bench/benchmarks/strftime.py @@ -53,7 +53,7 @@ def time_frame_datetime_formatting_custom(self, nobs): class PeriodStrftime: timeout = 1500 - params = ([1000, 10000], ["D", "H"]) + params = ([1000, 10000], ["D", "h"]) param_names = ["nobs", "freq"] def setup(self, nobs, freq): @@ -67,7 +67,7 @@ def setup(self, nobs, freq): self.data.set_index("i", inplace=True) if freq == "D": self.default_fmt = "%Y-%m-%d" - elif freq == "H": + elif freq == "h": self.default_fmt = "%Y-%m-%d %H:00" def time_frame_period_to_str(self, nobs, freq): diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 8c78a9c1723df..8e1deb99a66a4 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -27,7 +27,7 @@ def setup(self, index_type): N = 100000 dtidxes = { "dst": date_range( - start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s" ), "repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10), "tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"), @@ -72,13 +72,13 @@ class TzLocalize: def setup(self, tz): dst_rng = date_range( - start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s" ) - self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="S") + self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="s") self.index = self.index.append(dst_rng) self.index = self.index.append(dst_rng) self.index = self.index.append( - date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="S") + date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="s") ) def time_infer_dst(self, tz): @@ -90,7 +90,7 @@ class ResetIndex: param_names = "tz" def setup(self, tz): - idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz) + idx = date_range(start="1/1/2000", periods=1000, freq="h", tz=tz) self.df = DataFrame(np.random.randn(1000, 2), index=idx) def time_reset_datetimeindex(self, tz): @@ -255,7 +255,7 @@ def time_get_slice(self, monotonic): class Lookup: def setup(self): N = 1500000 - rng = date_range(start="1/1/2000", periods=N, freq="S") + rng = date_range(start="1/1/2000", periods=N, freq="s") self.ts = Series(1, index=rng) self.lookup_val = rng[N // 2]