From 6296e03ee2a9ca2a8e42606e06d75d146ec1af8c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Nov 2023 17:39:51 -0800
Subject: [PATCH 1/2] TST: de-xfail some pyarrow tests (#55918)

* TST: un-xfail pyarrow verbose tests

* un-xfail pyarrow tests

* de-xfail pyarrow tests

* de-xfail pyarrow tests

* de-xfail pyarrow tests

* de-xfail pyarrow tests

* de-xfail pyarrow tests

* de-xfail pyarrow test

* De-xfail pyarrow tests
---
 pandas/io/parsers/arrow_parser_wrapper.py     |  9 ++-
 .../io/parser/common/test_common_basic.py     | 72 ++++++++++++++++---
 pandas/tests/io/parser/common/test_decimal.py | 11 ++-
 .../io/parser/common/test_file_buffer_url.py  | 46 +++++++++---
 pandas/tests/io/parser/common/test_ints.py    | 24 ++++---
 .../tests/io/parser/common/test_iterator.py   | 29 ++++++--
 .../io/parser/common/test_read_errors.py      | 40 ++++++++---
 pandas/tests/io/parser/common/test_verbose.py | 16 +++--
 .../io/parser/dtypes/test_categorical.py      | 18 +++--
 .../io/parser/dtypes/test_dtypes_basic.py     | 23 +++---
 10 files changed, 226 insertions(+), 62 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 35965c90ee7fb..a1d69deb6a21e 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -13,6 +13,7 @@
 )
 from pandas.util._exceptions import find_stack_level
 
+from pandas.core.dtypes.common import pandas_dtype
 from pandas.core.dtypes.inference import is_integer
 
 import pandas as pd
@@ -203,7 +204,13 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
             # Ignore non-existent columns from dtype mapping
             # like other parsers do
             if isinstance(self.dtype, dict):
-                self.dtype = {k: v for k, v in self.dtype.items() if k in frame.columns}
+                self.dtype = {
+                    k: pandas_dtype(v)
+                    for k, v in self.dtype.items()
+                    if k in frame.columns
+                }
+            else:
+                self.dtype = pandas_dtype(self.dtype)
             try:
                 frame = frame.astype(self.dtype)
             except TypeError as e:
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 3abbd14c20e16..a2ffec45cfc7f 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -119,7 +119,6 @@ def test_read_csv_local(all_parsers, csv1):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_1000_sep(all_parsers):
     parser = all_parsers
     data = """A|B|C
@@ -128,6 +127,12 @@ def test_1000_sep(all_parsers):
 """
     expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]})
 
+    if parser.engine == "pyarrow":
+        msg = "The 'thousands' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep="|", thousands=",")
+        return
+
     result = parser.read_csv(StringIO(data), sep="|", thousands=",")
     tm.assert_frame_equal(result, expected)
 
@@ -161,7 +166,6 @@ def test_csv_mixed_type(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_read_csv_low_memory_no_rows_with_index(all_parsers):
     # see gh-21141
     parser = all_parsers
@@ -174,6 +178,13 @@ def test_read_csv_low_memory_no_rows_with_index(all_parsers):
 2,2,3,4
 3,3,4,5
 """
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0)
+        return
+
     result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0)
     expected = DataFrame(columns=["A", "B", "C"])
     tm.assert_frame_equal(result, expected)
@@ -212,7 +223,6 @@ def test_read_csv_dataframe(all_parsers, csv1):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("nrows", [3, 3.0])
 def test_read_nrows(all_parsers, nrows):
     # see gh-10476
@@ -230,11 +240,16 @@ def test_read_nrows(all_parsers, nrows):
     )
     parser = all_parsers
 
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), nrows=nrows)
+        return
+
     result = parser.read_csv(StringIO(data), nrows=nrows)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("nrows", [1.2, "foo", -1])
 def test_read_nrows_bad(all_parsers, nrows):
     data = """index,A,B,C,D
@@ -247,6 +262,8 @@ def test_read_nrows_bad(all_parsers, nrows):
 """
     msg = r"'nrows' must be an integer >=0"
     parser = all_parsers
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
 
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(data), nrows=nrows)
@@ -277,7 +294,6 @@ def test_missing_trailing_delimiters(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_skip_initial_space(all_parsers):
     data = (
         '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, '
@@ -289,6 +305,18 @@ def test_skip_initial_space(all_parsers):
     )
     parser = all_parsers
 
+    if parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                names=list(range(33)),
+                header=None,
+                na_values=["-9999.0"],
+                skipinitialspace=True,
+            )
+        return
+
     result = parser.read_csv(
         StringIO(data),
         names=list(range(33)),
@@ -437,7 +465,6 @@ def test_read_empty_with_usecols(all_parsers, data, kwargs, expected):
         tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize(
     "kwargs,expected",
     [
@@ -467,6 +494,12 @@ def test_trailing_spaces(all_parsers, kwargs, expected):
     data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"  # noqa: E501
     parser = all_parsers
 
+    if parser.engine == "pyarrow":
+        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+        return
+
     result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
     tm.assert_frame_equal(result, expected)
 
@@ -488,7 +521,6 @@ def test_read_filepath_or_buffer(all_parsers):
         parser.read_csv(filepath_or_buffer=b"input")
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("delim_whitespace", [True, False])
 def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
     # see gh-9710
@@ -501,6 +533,15 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
 b\n"""
 
     expected = DataFrame({"MyColumn": list("abab")})
+
+    if parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
+            )
+        return
+
     result = parser.read_csv(
         StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
     )
@@ -688,7 +729,6 @@ def test_first_row_bom_unquoted(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("nrows", range(1, 6))
 def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
     # GH 28071
@@ -698,6 +738,15 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
     )
     csv = "\nheader\n\na,b\n\n\n1,2\n\n3,4"
     parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False
+            )
+        return
+
     df = parser.read_csv(StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False)
     tm.assert_frame_equal(df, ref[:nrows])
 
@@ -731,11 +780,16 @@ def test_read_csv_names_not_accepting_sets(all_parsers):
         parser.read_csv(StringIO(data), names=set("QAZ"))
 
 
-@xfail_pyarrow
 def test_read_table_delim_whitespace_default_sep(all_parsers):
     # GH: 35958
     f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
     parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_table(f, delim_whitespace=True)
+        return
     result = parser.read_table(f, delim_whitespace=True)
     expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py
index b8a68c138eeff..4ceca037f589a 100644
--- a/pandas/tests/io/parser/common/test_decimal.py
+++ b/pandas/tests/io/parser/common/test_decimal.py
@@ -13,10 +13,7 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
-
-@xfail_pyarrow
 @pytest.mark.parametrize(
     "data,thousands,decimal",
     [
@@ -42,6 +39,14 @@ def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal):
     parser = all_parsers
     expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]})
 
+    if parser.engine == "pyarrow":
+        msg = "The 'thousands' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), sep="|", thousands=thousands, decimal=decimal
+            )
+        return
+
     result = parser.read_csv(
         StringIO(data), sep="|", thousands=thousands, decimal=decimal
     )
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 5d5814e880f8b..7fd86e956b543 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -214,8 +214,14 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
     # see gh-10728, gh-10548
     parser = all_parsers
 
+    if parser.engine == "pyarrow" and "comment" in kwargs:
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+        return
+
     if parser.engine == "pyarrow" and "\r" not in data:
-        mark = pytest.mark.xfail(reason="The 'comment' option is not supported")
+        mark = pytest.mark.xfail(reason="Mismatched exception type/message")
         request.applymarker(mark)
 
     if expected is None:
@@ -356,7 +362,6 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding):
     assert not handle.closed
 
 
-@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_memory_map_compression(all_parsers, compression):
     """
     Support memory map for compressed files.
@@ -369,19 +374,32 @@ def test_memory_map_compression(all_parsers, compression):
     with tm.ensure_clean() as path:
         expected.to_csv(path, index=False, compression=compression)
 
-        tm.assert_frame_equal(
-            parser.read_csv(path, memory_map=True, compression=compression),
-            expected,
-        )
+        if parser.engine == "pyarrow":
+            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(path, memory_map=True, compression=compression)
+            return
+
+        result = parser.read_csv(path, memory_map=True, compression=compression)
+
+    tm.assert_frame_equal(
+        result,
+        expected,
+    )
 
 
-@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_context_manager(all_parsers, datapath):
     # make sure that opened files are closed
     parser = all_parsers
 
     path = datapath("io", "data", "csv", "iris.csv")
 
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(path, chunksize=1)
+        return
+
     reader = parser.read_csv(path, chunksize=1)
     assert not reader.handles.handle.closed
     try:
@@ -392,12 +410,17 @@ def test_context_manager(all_parsers, datapath):
         assert reader.handles.handle.closed
 
 
-@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_context_manageri_user_provided(all_parsers, datapath):
     # make sure that user-provided handles are not closed
     parser = all_parsers
 
     with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path:
+        if parser.engine == "pyarrow":
+            msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(path, chunksize=1)
+            return
+
         reader = parser.read_csv(path, chunksize=1)
         assert not reader.handles.handle.closed
         try:
@@ -417,7 +440,6 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write):
             parser.read_csv(path)
 
 
-@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_memory_map(all_parsers, csv_dir_path):
     mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
     parser = all_parsers
@@ -426,5 +448,11 @@ def test_memory_map(all_parsers, csv_dir_path):
         {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]}
     )
 
+    if parser.engine == "pyarrow":
+        msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(mmap_file, memory_map=True)
+        return
+
     result = parser.read_csv(mmap_file, memory_map=True)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py
index 086b43be59823..41bfbb55d818f 100644
--- a/pandas/tests/io/parser/common/test_ints.py
+++ b/pandas/tests/io/parser/common/test_ints.py
@@ -126,10 +126,8 @@ def test_int64_min_issues(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-# ValueError: The 'converters' option is not supported with the 'pyarrow' engine
-@xfail_pyarrow
 @pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
-def test_int64_overflow(all_parsers, conv):
+def test_int64_overflow(all_parsers, conv, request):
     data = """ID
 00013007854817840016671868
 00013007854817840016749251
@@ -143,6 +141,10 @@ def test_int64_overflow(all_parsers, conv):
     if conv is None:
         # 13007854817840016671868 > UINT64_MAX, so this
         # will overflow and return object as the dtype.
+        if parser.engine == "pyarrow":
+            mark = pytest.mark.xfail(reason="parses to float64")
+            request.applymarker(mark)
+
         result = parser.read_csv(StringIO(data))
         expected = DataFrame(
             [
@@ -161,13 +163,19 @@ def test_int64_overflow(all_parsers, conv):
         # 13007854817840016671868 > UINT64_MAX, so attempts
         # to cast to either int64 or uint64 will result in
         # an OverflowError being raised.
-        msg = (
-            "(Python int too large to convert to C long)|"
-            "(long too big to convert)|"
-            "(int too big to convert)"
+        msg = "|".join(
+            [
+                "Python int too large to convert to C long",
+                "long too big to convert",
+                "int too big to convert",
+            ]
         )
+        err = OverflowError
+        if parser.engine == "pyarrow":
+            err = ValueError
+            msg = "The 'converters' option is not supported with the 'pyarrow' engine"
 
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(err, match=msg):
             parser.read_csv(StringIO(data), converters={"ID": conv})
 
 
diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py
index 26619857bd231..a521c84aa007d 100644
--- a/pandas/tests/io/parser/common/test_iterator.py
+++ b/pandas/tests/io/parser/common/test_iterator.py
@@ -15,10 +15,8 @@
 pytestmark = pytest.mark.filterwarnings(
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
-@xfail_pyarrow  # ValueError: The 'iterator' option is not supported
 def test_iterator(all_parsers):
     # see gh-6607
     data = """index,A,B,C,D
@@ -33,6 +31,13 @@ def test_iterator(all_parsers):
     kwargs = {"index_col": 0}
 
     expected = parser.read_csv(StringIO(data), **kwargs)
+
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), iterator=True, **kwargs)
+        return
+
     with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader:
         first_chunk = reader.read(3)
         tm.assert_frame_equal(first_chunk, expected[:3])
@@ -41,7 +46,6 @@ def test_iterator(all_parsers):
     tm.assert_frame_equal(last_chunk, expected[3:])
 
 
-@xfail_pyarrow  # ValueError: The 'iterator' option is not supported
 def test_iterator2(all_parsers):
     parser = all_parsers
     data = """A,B,C
@@ -50,6 +54,12 @@ def test_iterator2(all_parsers):
 baz,7,8,9
 """
 
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), iterator=True)
+        return
+
     with parser.read_csv(StringIO(data), iterator=True) as reader:
         result = list(reader)
 
@@ -61,7 +71,6 @@ def test_iterator2(all_parsers):
     tm.assert_frame_equal(result[0], expected)
 
 
-@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_iterator_stop_on_chunksize(all_parsers):
     # gh-3967: stopping iteration when chunksize is specified
     parser = all_parsers
@@ -70,6 +79,11 @@ def test_iterator_stop_on_chunksize(all_parsers):
 bar,4,5,6
 baz,7,8,9
 """
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), chunksize=1)
+        return
 
     with parser.read_csv(StringIO(data), chunksize=1) as reader:
         result = list(reader)
@@ -83,7 +97,6 @@ def test_iterator_stop_on_chunksize(all_parsers):
     tm.assert_frame_equal(concat(result), expected)
 
 
-@xfail_pyarrow  # AssertionError: Regex pattern did not match
 @pytest.mark.parametrize(
     "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
 )
@@ -92,6 +105,12 @@ def test_iterator_skipfooter_errors(all_parsers, kwargs):
     parser = all_parsers
     data = "a\n1\n2"
 
+    if parser.engine == "pyarrow":
+        msg = (
+            "The '(chunksize|iterator)' option is not supported with the "
+            "'pyarrow' engine"
+        )
+
     with pytest.raises(ValueError, match=msg):
         with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _:
             pass
diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py
index 52ddb38192a6b..7e841ed8b4ebd 100644
--- a/pandas/tests/io/parser/common/test_read_errors.py
+++ b/pandas/tests/io/parser/common/test_read_errors.py
@@ -63,7 +63,6 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
             parser.read_csv(stream)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 def test_malformed(all_parsers):
     # see gh-6607
     parser = all_parsers
@@ -74,11 +73,14 @@ def test_malformed(all_parsers):
 2,3,4
 """
     msg = "Expected 3 fields in line 4, saw 5"
-    with pytest.raises(ParserError, match=msg):
+    err = ParserError
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        err = ValueError
+    with pytest.raises(err, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#")
 
 
-@xfail_pyarrow  # ValueError: The 'iterator' option is not supported
 @pytest.mark.parametrize("nrows", [5, 3, None])
 def test_malformed_chunks(all_parsers, nrows):
     data = """ignore
@@ -90,6 +92,20 @@ def test_malformed_chunks(all_parsers, nrows):
 2,3,4
 """
     parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                header=1,
+                comment="#",
+                iterator=True,
+                chunksize=1,
+                skiprows=[2],
+            )
+        return
+
     msg = "Expected 3 fields in line 6, saw 5"
     with parser.read_csv(
         StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2]
@@ -239,19 +255,21 @@ def test_null_byte_char(request, all_parsers):
             parser.read_csv(StringIO(data), names=names)
 
 
-# ValueError: the 'pyarrow' engine does not support sep=None with delim_whitespace=False
-@xfail_pyarrow
 @pytest.mark.filterwarnings("always::ResourceWarning")
 def test_open_file(request, all_parsers):
     # GH 39024
     parser = all_parsers
+
+    msg = "Could not determine delimiter"
+    err = csv.Error
     if parser.engine == "c":
-        request.applymarker(
-            pytest.mark.xfail(
-                reason=f"{parser.engine} engine does not support sep=None "
-                f"with delim_whitespace=False"
-            )
+        msg = "the 'c' engine does not support sep=None with delim_whitespace=False"
+        err = ValueError
+    elif parser.engine == "pyarrow":
+        msg = (
+            "the 'pyarrow' engine does not support sep=None with delim_whitespace=False"
         )
+        err = ValueError
 
     with tm.ensure_clean() as path:
         file = Path(path)
@@ -259,7 +277,7 @@ def test_open_file(request, all_parsers):
 
         with tm.assert_produces_warning(None):
             # should not trigger a ResourceWarning
-            with pytest.raises(csv.Error, match="Could not determine delimiter"):
+            with pytest.raises(err, match=msg):
                 parser.read_csv(file, sep=None, encoding_errors="replace")
 
 
diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py
index bcfb9cd4032ad..14deba8b40b22 100644
--- a/pandas/tests/io/parser/common/test_verbose.py
+++ b/pandas/tests/io/parser/common/test_verbose.py
@@ -6,10 +6,7 @@
 
 import pytest
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
-
-@xfail_pyarrow  # ValueError: The 'verbose' option is not supported
 def test_verbose_read(all_parsers, capsys):
     parser = all_parsers
     data = """a,b,c,d
@@ -22,6 +19,12 @@ def test_verbose_read(all_parsers, capsys):
 one,1,2,3
 two,1,2,3"""
 
+    if parser.engine == "pyarrow":
+        msg = "The 'verbose' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), verbose=True)
+        return
+
     # Engines are verbose in different ways.
     parser.read_csv(StringIO(data), verbose=True)
     captured = capsys.readouterr()
@@ -33,7 +36,6 @@ def test_verbose_read(all_parsers, capsys):
         assert captured.out == "Filled 3 NA values in column a\n"
 
 
-@xfail_pyarrow  # ValueError: The 'verbose' option is not supported
 def test_verbose_read2(all_parsers, capsys):
     parser = all_parsers
     data = """a,b,c,d
@@ -46,6 +48,12 @@ def test_verbose_read2(all_parsers, capsys):
 seven,1,2,3
 eight,1,2,3"""
 
+    if parser.engine == "pyarrow":
+        msg = "The 'verbose' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), verbose=True, index_col=0)
+        return
+
     parser.read_csv(StringIO(data), verbose=True, index_col=0)
     captured = capsys.readouterr()
 
diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py
index c7586bd9334ef..b1b35447b60c2 100644
--- a/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/pandas/tests/io/parser/dtypes/test_categorical.py
@@ -146,8 +146,6 @@ def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
     tm.assert_frame_equal(actual, expected)
 
 
-# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
-@xfail_pyarrow
 def test_categorical_dtype_chunksize_infer_categories(all_parsers):
     # see gh-10153
     parser = all_parsers
@@ -160,6 +158,13 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers):
         DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
         DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
     ]
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), dtype={"b": "category"}, chunksize=2)
+        return
+
     with parser.read_csv(
         StringIO(data), dtype={"b": "category"}, chunksize=2
     ) as actuals:
@@ -167,8 +172,6 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers):
             tm.assert_frame_equal(actual, expected)
 
 
-# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
-@xfail_pyarrow
 def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
     # see gh-10153
     parser = all_parsers
@@ -186,6 +189,13 @@ def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
         ),
     ]
     dtype = CategoricalDtype(cats)
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)
+        return
+
     with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals:
         for actual, expected in zip(actuals, expecteds):
             tm.assert_frame_equal(actual, expected)
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 3f3d340ab2e08..32b4b1dedc3cb 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -73,7 +73,6 @@ def test_dtype_per_column(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.usefixtures("pyarrow_xfail")
 def test_invalid_dtype_per_column(all_parsers):
     parser = all_parsers
     data = """\
@@ -87,7 +86,6 @@ def test_invalid_dtype_per_column(all_parsers):
         parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
 
 
-@pytest.mark.usefixtures("pyarrow_xfail")
 def test_raise_on_passed_int_dtype_with_nas(all_parsers):
     # see gh-2631
     parser = all_parsers
@@ -96,22 +94,31 @@ def test_raise_on_passed_int_dtype_with_nas(all_parsers):
 2001,,11
 2001,106380451,67"""
 
-    msg = (
-        "Integer column has NA values"
-        if parser.engine == "c"
-        else "Unable to convert column DOY"
-    )
+    if parser.engine == "c":
+        msg = "Integer column has NA values"
+    elif parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+    else:
+        msg = "Unable to convert column DOY"
+
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
 
 
-@pytest.mark.usefixtures("pyarrow_xfail")
 def test_dtype_with_converters(all_parsers):
     parser = all_parsers
     data = """a,b
 1.1,2.2
 1.2,2.3"""
 
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+            )
+        return
+
     # Dtype spec ignored if converted specified.
     result = parser.read_csv_check_warnings(
         ParserWarning,

From b2d9ec17c52084ee2b629633c9119c01ea11d387 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sat, 11 Nov 2023 14:52:00 -0500
Subject: [PATCH 2/2] ASV: avoid "H" and "S" freq deprecations (#55921)

update ASVs for freq deprecations
---
 asv_bench/benchmarks/algorithms.py     |  8 ++++----
 asv_bench/benchmarks/arithmetic.py     |  2 +-
 asv_bench/benchmarks/frame_methods.py  |  6 +++---
 asv_bench/benchmarks/gil.py            |  2 +-
 asv_bench/benchmarks/groupby.py        |  2 +-
 asv_bench/benchmarks/indexing.py       |  4 ++--
 asv_bench/benchmarks/inference.py      |  4 ++--
 asv_bench/benchmarks/io/csv.py         | 12 ++++++------
 asv_bench/benchmarks/io/excel.py       |  2 +-
 asv_bench/benchmarks/io/hdf.py         |  2 +-
 asv_bench/benchmarks/io/json.py        | 10 +++++-----
 asv_bench/benchmarks/io/pickle.py      |  2 +-
 asv_bench/benchmarks/io/stata.py       |  2 +-
 asv_bench/benchmarks/join_merge.py     |  2 +-
 asv_bench/benchmarks/period.py         |  2 +-
 asv_bench/benchmarks/series_methods.py |  4 ++--
 asv_bench/benchmarks/strftime.py       |  4 ++--
 asv_bench/benchmarks/timeseries.py     | 12 ++++++------
 18 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 192f19c36b47d..6ab8e4f14e979 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -50,9 +50,9 @@ def setup(self, unique, sort, dtype):
             "float": pd.Index(np.random.randn(N), dtype="float64"),
             "object_str": string_index,
             "object": pd.Index(np.arange(N), dtype="object"),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
             "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
             ),
             "Int64": pd.array(np.arange(N), dtype="Int64"),
             "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
@@ -93,9 +93,9 @@ def setup(self, unique, keep, dtype):
             "uint": pd.Index(np.arange(N), dtype="uint64"),
             "float": pd.Index(np.random.randn(N), dtype="float64"),
             "string": tm.makeStringIndex(N),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
             "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
             ),
             "timestamp[ms][pyarrow]": pd.Index(
                 np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index 49543c166d047..d70ad144a3455 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
     param_names = ["func"]
 
     def setup(self, func):
-        array = date_range("20200101 00:00", "20200102 0:00", freq="S")
+        array = date_range("20200101 00:00", "20200102 0:00", freq="s")
         level_0_names = [str(i) for i in range(30)]
 
         index = pd.MultiIndex.from_product([level_0_names, array])
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index e56fbf1d8c32f..c4ab73553cf1a 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
         N, M = 10000, 100
         if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
             data = {
-                "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
+                "datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
                 "datetime64[ns, tz]": date_range(
-                    "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                    "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
                 ),
                 "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
             }
@@ -649,7 +649,7 @@ def time_series_nunique_nan(self):
 class Duplicated:
     def setup(self):
         n = 1 << 20
-        t = date_range("2015-01-01", freq="S", periods=(n // 64))
+        t = date_range("2015-01-01", freq="s", periods=(n // 64))
         xs = np.random.randn(n // 64).round(2)
         self.df = DataFrame(
             {
diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 4993ffd2c47d0..fb4523f78ccb5 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -212,7 +212,7 @@ def run(dti):
     def time_datetime_to_period(self):
         @test_parallel(num_threads=2)
         def run(dti):
-            dti.to_period("S")
+            dti.to_period("s")
 
         run(self.dti)
 
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 54c240e84243a..d36d88e7b6b42 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -238,7 +238,7 @@ def time_series_nth(self, dtype):
 
 class DateAttributes:
     def setup(self):
-        rng = date_range("1/1/2000", "12/31/2005", freq="H")
+        rng = date_range("1/1/2000", "12/31/2005", freq="h")
         self.year, self.month, self.day = rng.year, rng.month, rng.day
         self.ts = Series(np.random.randn(len(rng)), index=rng)
 
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 84d95a23bd446..d8b1bf327294a 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -232,7 +232,7 @@ def setup(self, index):
         N = 100000
         indexes = {
             "int": Index(np.arange(N), dtype=np.int64),
-            "datetime": date_range("2011-01-01", freq="S", periods=N),
+            "datetime": date_range("2011-01-01", freq="s", periods=N),
         }
         index = indexes[index]
         self.s = Series(np.random.rand(N), index=index)
@@ -465,7 +465,7 @@ def time_loc_row(self, unique_cols):
 class AssignTimeseriesIndex:
     def setup(self):
         N = 100000
-        idx = date_range("1/1/2000", periods=N, freq="H")
+        idx = date_range("1/1/2000", periods=N, freq="h")
         self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx)
 
     def time_frame_assign_timeseries_index(self):
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index 476ff14dcc92a..805b0c807452c 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -164,7 +164,7 @@ def time_unique_date_strings(self, cache, count):
 
 class ToDatetimeISO8601:
     def setup(self):
-        rng = date_range(start="1/1/2000", periods=20000, freq="H")
+        rng = date_range(start="1/1/2000", periods=20000, freq="h")
         self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
         self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
         self.strings_tz_space = [
@@ -276,7 +276,7 @@ def time_dup_string_tzoffset_dates(self, cache):
 # GH 43901
 class ToDatetimeInferDatetimeFormat:
     def setup(self):
-        rng = date_range(start="1/1/2000", periods=100000, freq="H")
+        rng = date_range(start="1/1/2000", periods=100000, freq="h")
         self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
 
     def time_infer_datetime_format(self):
diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index c5e3e80571e30..1826291034dee 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -89,7 +89,7 @@ class ToCSVDatetimeIndex(BaseIO):
     fname = "__test__.csv"
 
     def setup(self):
-        rng = date_range("2000", periods=100_000, freq="S")
+        rng = date_range("2000", periods=100_000, freq="s")
         self.data = DataFrame({"a": 1}, index=rng)
 
     def time_frame_date_formatting_index(self):
@@ -102,7 +102,7 @@ def time_frame_date_no_format_index(self):
 class ToCSVPeriod(BaseIO):
     fname = "__test__.csv"
 
-    params = ([1000, 10000], ["D", "H"])
+    params = ([1000, 10000], ["D", "h"])
     param_names = ["nobs", "freq"]
 
     def setup(self, nobs, freq):
@@ -110,7 +110,7 @@ def setup(self, nobs, freq):
         self.data = DataFrame(rng)
         if freq == "D":
             self.default_fmt = "%Y-%m-%d"
-        elif freq == "H":
+        elif freq == "h":
             self.default_fmt = "%Y-%m-%d %H:00"
 
     def time_frame_period_formatting_default(self, nobs, freq):
@@ -130,7 +130,7 @@ def time_frame_period_formatting(self, nobs, freq):
 class ToCSVPeriodIndex(BaseIO):
     fname = "__test__.csv"
 
-    params = ([1000, 10000], ["D", "H"])
+    params = ([1000, 10000], ["D", "h"])
     param_names = ["nobs", "freq"]
 
     def setup(self, nobs, freq):
@@ -138,7 +138,7 @@ def setup(self, nobs, freq):
         self.data = DataFrame({"a": 1}, index=rng)
         if freq == "D":
             self.default_fmt = "%Y-%m-%d"
-        elif freq == "H":
+        elif freq == "h":
             self.default_fmt = "%Y-%m-%d %H:00"
 
     def time_frame_period_formatting_index(self, nobs, freq):
@@ -253,7 +253,7 @@ class ReadCSVConcatDatetime(StringIORewind):
     iso8601 = "%Y-%m-%d %H:%M:%S"
 
     def setup(self):
-        rng = date_range("1/1/2000", periods=50000, freq="S")
+        rng = date_range("1/1/2000", periods=50000, freq="s")
         self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist()))
 
     def time_read_csv(self):
diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index c77c6b6f5727c..f8d81b0f6a699 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -25,7 +25,7 @@ def _generate_dataframe():
     df = DataFrame(
         np.random.randn(N, C),
         columns=[f"float{i}" for i in range(C)],
-        index=date_range("20000101", periods=N, freq="H"),
+        index=date_range("20000101", periods=N, freq="h"),
     )
     df["object"] = tm.makeStringIndex(N)
     return df
diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py
index f3e417e717609..195aaa158e178 100644
--- a/asv_bench/benchmarks/io/hdf.py
+++ b/asv_bench/benchmarks/io/hdf.py
@@ -122,7 +122,7 @@ def setup(self, format):
         self.df = DataFrame(
             np.random.randn(N, C),
             columns=[f"float{i}" for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
+            index=date_range("20000101", periods=N, freq="h"),
         )
         self.df["object"] = tm.makeStringIndex(N)
         self.df.to_hdf(self.fname, "df", format=format)
diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
index bebf6ee993aba..8a2e3fa87eb37 100644
--- a/asv_bench/benchmarks/io/json.py
+++ b/asv_bench/benchmarks/io/json.py
@@ -26,7 +26,7 @@ def setup(self, orient, index):
         N = 100000
         indexes = {
             "int": np.arange(N),
-            "datetime": date_range("20000101", periods=N, freq="H"),
+            "datetime": date_range("20000101", periods=N, freq="h"),
         }
         df = DataFrame(
             np.random.randn(N, 5),
@@ -48,7 +48,7 @@ def setup(self, index):
         N = 100000
         indexes = {
             "int": np.arange(N),
-            "datetime": date_range("20000101", periods=N, freq="H"),
+            "datetime": date_range("20000101", periods=N, freq="h"),
         }
         df = DataFrame(
             np.random.randn(N, 5),
@@ -108,7 +108,7 @@ class ToJSON(BaseIO):
     def setup(self, orient, frame):
         N = 10**5
         ncols = 5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         ints = np.random.randint(100000000, size=N)
@@ -191,7 +191,7 @@ class ToJSONISO(BaseIO):
 
     def setup(self, orient):
         N = 10**5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         self.df = DataFrame(
@@ -214,7 +214,7 @@ class ToJSONLines(BaseIO):
     def setup(self):
         N = 10**5
         ncols = 5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         ints = np.random.randint(100000000, size=N)
diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py
index c71cdcdcc5c59..54631d9236887 100644
--- a/asv_bench/benchmarks/io/pickle.py
+++ b/asv_bench/benchmarks/io/pickle.py
@@ -20,7 +20,7 @@ def setup(self):
         self.df = DataFrame(
             np.random.randn(N, C),
             columns=[f"float{i}" for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
+            index=date_range("20000101", periods=N, freq="h"),
         )
         self.df["object"] = tm.makeStringIndex(N)
         self.df.to_pickle(self.fname)
diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py
index 300b9c778f1f8..750bcf4ccee5c 100644
--- a/asv_bench/benchmarks/io/stata.py
+++ b/asv_bench/benchmarks/io/stata.py
@@ -23,7 +23,7 @@ def setup(self, convert_dates):
         self.df = DataFrame(
             np.random.randn(N, C),
             columns=[f"float{i}" for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
+            index=date_range("20000101", periods=N, freq="h"),
         )
         self.df["object"] = tm.makeStringIndex(self.N)
         self.df["int8_"] = np.random.randint(
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 04ac47a892a22..23824c2c748df 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -213,7 +213,7 @@ class JoinNonUnique:
     # GH 6329
     def setup(self):
         date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="min")
-        daily_dates = date_index.to_period("D").to_timestamp("S", "S")
+        daily_dates = date_index.to_period("D").to_timestamp("s", "s")
         self.fracofday = date_index.values - daily_dates.values
         self.fracofday = self.fracofday.astype("timedelta64[ns]")
         self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index 501fe198d41d8..ccd86cae06d58 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -45,7 +45,7 @@ def time_from_ints_daily(self, freq, is_offset):
 
 class DataFramePeriodColumn:
     def setup(self):
-        self.rng = period_range(start="1/1/1990", freq="S", periods=20000)
+        self.rng = period_range(start="1/1/1990", freq="s", periods=20000)
         self.df = DataFrame(index=range(len(self.rng)))
 
     def time_setitem_period_column(self):
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index f52f7a4bef37a..459d562828f88 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -64,7 +64,7 @@ def setup(self, dtype):
         N = 10**6
         data = {
             "int": np.random.randint(1, 10, N),
-            "datetime": date_range("2000-01-01", freq="S", periods=N),
+            "datetime": date_range("2000-01-01", freq="s", periods=N),
         }
         self.s = Series(data[dtype])
         if dtype == "datetime":
@@ -92,7 +92,7 @@ class Fillna:
     def setup(self, dtype):
         N = 10**6
         if dtype == "datetime64[ns]":
-            data = date_range("2000-01-01", freq="S", periods=N)
+            data = date_range("2000-01-01", freq="s", periods=N)
             na_value = NaT
         elif dtype in ("float64", "Float64"):
             data = np.random.randn(N)
diff --git a/asv_bench/benchmarks/strftime.py b/asv_bench/benchmarks/strftime.py
index 39cc82e1bdf79..47f25b331ab9b 100644
--- a/asv_bench/benchmarks/strftime.py
+++ b/asv_bench/benchmarks/strftime.py
@@ -53,7 +53,7 @@ def time_frame_datetime_formatting_custom(self, nobs):
 
 class PeriodStrftime:
     timeout = 1500
-    params = ([1000, 10000], ["D", "H"])
+    params = ([1000, 10000], ["D", "h"])
     param_names = ["nobs", "freq"]
 
     def setup(self, nobs, freq):
@@ -67,7 +67,7 @@ def setup(self, nobs, freq):
         self.data.set_index("i", inplace=True)
         if freq == "D":
             self.default_fmt = "%Y-%m-%d"
-        elif freq == "H":
+        elif freq == "h":
             self.default_fmt = "%Y-%m-%d %H:00"
 
     def time_frame_period_to_str(self, nobs, freq):
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 8c78a9c1723df..8e1deb99a66a4 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -27,7 +27,7 @@ def setup(self, index_type):
         N = 100000
         dtidxes = {
             "dst": date_range(
-                start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S"
+                start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s"
             ),
             "repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10),
             "tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"),
@@ -72,13 +72,13 @@ class TzLocalize:
 
     def setup(self, tz):
         dst_rng = date_range(
-            start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S"
+            start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s"
         )
-        self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="S")
+        self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="s")
         self.index = self.index.append(dst_rng)
         self.index = self.index.append(dst_rng)
         self.index = self.index.append(
-            date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="S")
+            date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="s")
         )
 
     def time_infer_dst(self, tz):
@@ -90,7 +90,7 @@ class ResetIndex:
     param_names = "tz"
 
     def setup(self, tz):
-        idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz)
+        idx = date_range(start="1/1/2000", periods=1000, freq="h", tz=tz)
         self.df = DataFrame(np.random.randn(1000, 2), index=idx)
 
     def time_reset_datetimeindex(self, tz):
@@ -255,7 +255,7 @@ def time_get_slice(self, monotonic):
 class Lookup:
     def setup(self):
         N = 1500000
-        rng = date_range(start="1/1/2000", periods=N, freq="S")
+        rng = date_range(start="1/1/2000", periods=N, freq="s")
         self.ts = Series(1, index=rng)
         self.lookup_val = rng[N // 2]