-
Notifications
You must be signed in to change notification settings - Fork 921
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Python bindings for time zone data (TZiF) reader (#12826)
This PR adds bindings to the TZiF reader that was added in the libcudf API in #12805. No tests are being added as these bindings are just for internal-use. In follow-up PRs, I will add a timezone-aware datetime type and timezone-aware operations to the public API, along with tests for those operations. The bindings can be used as follows: ```python >>> transition_times, offsets = make_timezone_transition_table("/usr/share/zoneinfo", "America/New_York") >>> transition_times <cudf.core.column.datetime.DatetimeColumn object at 0x7f95cd6ac840> [ 1883-11-18 17:00:00, 1883-11-18 17:00:00, 1918-03-31 07:00:00, 1918-10-27 06:00:00, 1919-03-30 07:00:00, 1919-10-26 06:00:00, 1920-03-28 07:00:00, 1920-10-31 06:00:00, 1921-04-24 07:00:00, 1921-09-25 06:00:00, ... 2365-03-14 07:00:00, 2365-11-07 06:00:00, 2366-03-13 07:00:00, 2366-11-06 06:00:00, 2367-03-12 07:00:00, 2367-11-05 06:00:00, 2368-03-10 07:00:00, 2368-11-03 06:00:00, 2369-03-09 07:00:00, 2369-11-02 06:00:00 ] dtype: datetime64[s] >>> offsets <cudf.core.column.timedelta.TimeDeltaColumn object at 0x7f94e69bad40> [ -18000, -18000, -14400, -18000, -14400, -18000, -14400, -18000, -14400, -18000, ... -14400, -18000, -14400, -18000, -14400, -18000, -14400, -18000, -14400, -18000 ] dtype: timedelta64[s] ``` Authors: - Ashwin Srinath (https://github.com/shwina) - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Bradley Dice (https://github.com/bdice) URL: #12826
- Loading branch information
Showing
6 changed files
with
166 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
strings, | ||
strings_udf, | ||
text, | ||
timezone, | ||
transpose, | ||
unary, | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) 2020-2023, NVIDIA CORPORATION. | ||
|
||
from libcpp cimport bool | ||
from libcpp.memory cimport unique_ptr | ||
from libcpp.string cimport string | ||
|
||
from cudf._lib.cpp.libcpp.optional cimport optional | ||
from cudf._lib.cpp.table.table cimport table | ||
|
||
|
||
cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil: | ||
unique_ptr[table] make_timezone_transition_table( | ||
optional[string] tzif_dir, | ||
string timezone_name | ||
) except + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & | ||
# AFFILIATES. All rights reserved. SPDX-License-Identifier: | ||
# Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from libcpp cimport bool | ||
|
||
|
||
cdef extern from "<optional>" namespace "std" nogil: | ||
cdef cppclass nullopt_t: | ||
nullopt_t() | ||
|
||
cdef nullopt_t nullopt | ||
|
||
cdef cppclass optional[T]: | ||
ctypedef T value_type | ||
optional() | ||
optional(nullopt_t) | ||
optional(optional&) except + | ||
optional(T&) except + | ||
bool has_value() | ||
T& value() | ||
T& value_or[U](U& default_value) | ||
void swap(optional&) | ||
void reset() | ||
T& emplace(...) | ||
T& operator*() | ||
optional& operator=(optional&) | ||
optional& operator=[U](U&) | ||
bool operator bool() | ||
bool operator!() | ||
bool operator==[U](optional&, U&) | ||
bool operator!=[U](optional&, U&) | ||
bool operator<[U](optional&, U&) | ||
bool operator>[U](optional&, U&) | ||
bool operator<=[U](optional&, U&) | ||
bool operator>=[U](optional&, U&) | ||
|
||
optional[T] make_optional[T](...) except + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.string cimport string | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.cpp.io.timezone cimport ( | ||
make_timezone_transition_table as cpp_make_timezone_transition_table, | ||
) | ||
from cudf._lib.cpp.libcpp.optional cimport make_optional | ||
from cudf._lib.cpp.table.table cimport table | ||
from cudf._lib.utils cimport columns_from_unique_ptr | ||
|
||
|
||
def make_timezone_transition_table(tzdir, tzname): | ||
cdef unique_ptr[table] c_result | ||
cdef string c_tzdir = tzdir.encode() | ||
cdef string c_tzname = tzname.encode() | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_make_timezone_transition_table( | ||
make_optional[string](c_tzdir), | ||
c_tzname | ||
) | ||
) | ||
|
||
return columns_from_unique_ptr(move(c_result)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. | ||
|
||
import os | ||
import zoneinfo | ||
from functools import lru_cache | ||
|
||
from cudf._lib.timezone import build_timezone_transition_table | ||
from cudf.core.dataframe import DataFrame | ||
|
||
|
||
@lru_cache(maxsize=20) | ||
def get_tz_data(zone_name): | ||
""" | ||
Return timezone data (transition times and UTC offsets) for the | ||
given IANA time zone. | ||
Parameters | ||
---------- | ||
zone_name: str | ||
IANA time zone name | ||
Returns | ||
------- | ||
DataFrame with two columns containing the transition times ("dt") | ||
and corresponding UTC offsets ("offset"). | ||
""" | ||
try: | ||
# like zoneinfo, we first look in TZPATH | ||
return _find_and_read_tzfile_tzpath(zone_name) | ||
except zoneinfo.ZoneInfoNotFoundError: | ||
# if that fails, we fall back to using `tzdata` | ||
return _find_and_read_tzfile_tzdata(zone_name) | ||
|
||
|
||
def _find_and_read_tzfile_tzpath(zone_name): | ||
for search_path in zoneinfo.TZPATH: | ||
if os.path.isfile(os.path.join(search_path, zone_name)): | ||
return _read_tzfile_as_frame(search_path, zone_name) | ||
raise zoneinfo.ZoneInfoNotFoundError(zone_name) | ||
|
||
|
||
def _find_and_read_tzfile_tzdata(zone_name): | ||
import importlib.resources | ||
|
||
package_base = "tzdata.zoneinfo" | ||
try: | ||
return _read_tzfile_as_frame( | ||
str(importlib.resources.files(package_base)), zone_name | ||
) | ||
# TODO: make it so that the call to libcudf raises a | ||
# FileNotFoundError instead of a RuntimeError | ||
except (ImportError, FileNotFoundError, UnicodeEncodeError, RuntimeError): | ||
# the "except" part of this try-except is basically vendored | ||
# from the zoneinfo library. | ||
# | ||
# There are three types of exception that can be raised that all amount | ||
# to "we cannot find this key": | ||
# | ||
# ImportError: If package_name doesn't exist (e.g. if tzdata is not | ||
# installed, or if there's an error in the folder name like | ||
# Amrica/New_York) | ||
# FileNotFoundError: If resource_name doesn't exist in the package | ||
# (e.g. Europe/Krasnoy) | ||
# UnicodeEncodeError: If package_name or resource_name are not UTF-8, | ||
# such as keys containing a surrogate character. | ||
raise zoneinfo.ZoneInfoNotFoundError(zone_name) | ||
|
||
|
||
def _read_tzfile_as_frame(tzdir, zone_name): | ||
dt, offsets = build_timezone_transition_table(tzdir, zone_name) | ||
return DataFrame._from_columns([dt, offsets], ["dt", "offsets"]) |