From 9e912baec890b1af009c0eac2b344229a9285e36 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 9 Jan 2025 10:38:40 +0800 Subject: [PATCH] clib.converison._to_numpy: Add tests for pandas.Series with datetime dtypes (#3670) --- pygmt/clib/conversion.py | 11 +++ pygmt/tests/test_clib_to_numpy.py | 107 ++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index c54923705dc..52eec0d2479 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -192,6 +192,17 @@ def _to_numpy(data: Any) -> np.ndarray: numpy_dtype = np.float64 data = data.to_numpy(na_value=np.nan) + # Deal with timezone-aware datetime dtypes. + if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype + numpy_dtype = getattr(dtype, "base", None) + elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"): + # pd.ArrowDtype[pa.Timestamp] + numpy_dtype = getattr(dtype, "numpy_dtype", None) + # TODO(pandas>=2.1): Remove the workaround for pandas<2.1. + if Version(pd.__version__) < Version("2.1"): + # In pandas 2.0, dtype.numpy_type is dtype("O"). + numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined] + array = np.ascontiguousarray(data, dtype=numpy_dtype) # Check if a np.object_ array can be converted to np.str_. diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 79858bc04d4..31b6c2421e8 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -365,6 +365,113 @@ def test_to_numpy_pandas_date(dtype, expected_dtype): ) +pandas_old_version = pytest.mark.xfail( + condition=Version(pd.__version__) < Version("2.1"), + reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705", +) + + +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + # NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported. + pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"), + pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"), + pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"), + pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"), + # pandas.DatetimeTZDtype can be given in two ways [tz is required]: + # 1. pandas.DatetimeTZDtype(unit, tz) + # 2. String aliases: "datetime64[unit, tz]" + pytest.param( + "datetime64[s, UTC]", + "datetime64[s]", + id="datetime64[s, tz=UTC]", + marks=pandas_old_version, + ), + pytest.param( + "datetime64[s, America/New_York]", + "datetime64[s]", + id="datetime64[s, tz=America/New_York]", + marks=pandas_old_version, + ), + pytest.param( + "datetime64[s, +07:30]", + "datetime64[s]", + id="datetime64[s, +07:30]", + marks=pandas_old_version, + ), + # PyArrow timestamp types can be given in two ways [tz is optional]: + # 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz)) + # 2. String aliases: "timestamp[unit, tz][pyarrow]" + pytest.param( + "timestamp[s][pyarrow]", + "datetime64[s]", + id="timestamp[s][pyarrow]", + marks=skip_if_no(package="pyarrow"), + ), + pytest.param( + "timestamp[ms][pyarrow]", + "datetime64[ms]", + id="timestamp[ms][pyarrow]", + marks=[skip_if_no(package="pyarrow"), pandas_old_version], + ), + pytest.param( + "timestamp[us][pyarrow]", + "datetime64[us]", + id="timestamp[us][pyarrow]", + marks=[skip_if_no(package="pyarrow"), pandas_old_version], + ), + pytest.param( + "timestamp[ns][pyarrow]", + "datetime64[ns]", + id="timestamp[ns][pyarrow]", + marks=skip_if_no(package="pyarrow"), + ), + pytest.param( + "timestamp[s, UTC][pyarrow]", + "datetime64[s]", + id="timestamp[s, UTC][pyarrow]", + marks=skip_if_no(package="pyarrow"), + ), + pytest.param( + "timestamp[s, America/New_York][pyarrow]", + "datetime64[s]", + id="timestamp[s, America/New_York][pyarrow]", + marks=skip_if_no(package="pyarrow"), + ), + pytest.param( + "timestamp[s, +08:00][pyarrow]", + "datetime64[s]", + id="timestamp[s, +08:00][pyarrow]", + marks=skip_if_no(package="pyarrow"), + ), + ], +) +def test_to_numpy_pandas_datetime(dtype, expected_dtype): + """ + Test the _to_numpy function with pandas.Series of datetime types. + """ + series = pd.Series( + [pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")], + dtype=dtype, + ) + result = _to_numpy(series) + _check_result(result, np.datetime64) + assert result.dtype == expected_dtype + + # Convert to UTC if the dtype is timezone-aware + if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware. + # TODO(pandas>=2.1): Simplify the if-else statement. + if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"): + # pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp. + series = pd.to_datetime(series, utc=True) + else: + series = series.dt.tz_convert("UTC") + # Remove time zone information and preserve local time. + expected_series = series.dt.tz_localize(tz=None) + npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype)) + + ######################################################################################## # Test the _to_numpy function with PyArrow arrays. #