Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Python bindings for time zone data (TZiF) reader #12826

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ set(cython_sources
string_casting.pyx
strings_udf.pyx
text.pyx
timezone.pyx
transform.pyx
transpose.pyx
types.pyx
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
strings,
strings_udf,
text,
timezone,
transpose,
unary,
)
Expand Down
15 changes: 15 additions & 0 deletions python/cudf/cudf/_lib/cpp/io/timezone.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string

from cudf._lib.cpp.libcpp.optional cimport optional
from cudf._lib.cpp.table.table cimport table


cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil:
unique_ptr[table] make_timezone_transition_table(
optional[string] tzif_dir,
string timezone_name
) except +
50 changes: 50 additions & 0 deletions python/cudf/cudf/_lib/cpp/libcpp/optional.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier:
# Apache-2.0
shwina marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from libcpp cimport bool


cdef extern from "<optional>" namespace "std" nogil:
cdef cppclass nullopt_t:
nullopt_t()

cdef nullopt_t nullopt

cdef cppclass optional[T]:
ctypedef T value_type
optional()
optional(nullopt_t)
optional(optional&) except +
optional(T&) except +
bool has_value()
T& value()
T& value_or[U](U& default_value)
void swap(optional&)
void reset()
T& emplace(...)
T& operator*()
optional& operator=(optional&)
optional& operator=[U](U&)
bool operator bool()
bool operator!()
bool operator==[U](optional&, U&)
bool operator!=[U](optional&, U&)
bool operator<[U](optional&, U&)
bool operator>[U](optional&, U&)
bool operator<=[U](optional&, U&)
bool operator>=[U](optional&, U&)

optional[T] make_optional[T](...) except +
28 changes: 28 additions & 0 deletions python/cudf/cudf/_lib/timezone.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) 2023, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf._lib.cpp.io.timezone cimport (
make_timezone_transition_table as cpp_make_timezone_transition_table,
)
from cudf._lib.cpp.libcpp.optional cimport make_optional
from cudf._lib.cpp.table.table cimport table
from cudf._lib.utils cimport columns_from_unique_ptr


def make_timezone_transition_table(tzdir, tzname):
cdef unique_ptr[table] c_result
cdef string c_tzdir = tzdir.encode()
cdef string c_tzname = tzname.encode()

with nogil:
c_result = move(
cpp_make_timezone_transition_table(
make_optional[string](c_tzdir),
c_tzname
)
)

return columns_from_unique_ptr(move(c_result))
71 changes: 71 additions & 0 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2023, NVIDIA CORPORATION.

import os
import zoneinfo
from functools import lru_cache

from cudf._lib.timezone import build_timezone_transition_table
from cudf.core.dataframe import DataFrame


@lru_cache(maxsize=20)
bdice marked this conversation as resolved.
Show resolved Hide resolved
def get_tz_data(zone_name):
"""
Return timezone data (transition times and UTC offsets) for the
given IANA time zone.

Parameters
----------
zone_name: str
IANA time zone name

Returns
-------
DataFrame with two columns containing the transition times ("dt")
and corresponding UTC offsets ("offset").
"""
try:
# like zoneinfo, we first look in TZPATH
return _find_and_read_tzfile_tzpath(zone_name)
except zoneinfo.ZoneInfoNotFoundError:
# if that fails, we fall back to using `tzdata`
return _find_and_read_tzfile_tzdata(zone_name)


def _find_and_read_tzfile_tzpath(zone_name):
for search_path in zoneinfo.TZPATH:
if os.path.isfile(os.path.join(search_path, zone_name)):
return _read_tzfile_as_frame(search_path, zone_name)
raise zoneinfo.ZoneInfoNotFoundError(zone_name)


def _find_and_read_tzfile_tzdata(zone_name):
import importlib.resources

package_base = "tzdata.zoneinfo"
try:
return _read_tzfile_as_frame(
str(importlib.resources.files(package_base)), zone_name
)
# TODO: make it so that the call to libcudf raises a
# FileNotFoundError instead of a RuntimeError
shwina marked this conversation as resolved.
Show resolved Hide resolved
except (ImportError, FileNotFoundError, UnicodeEncodeError, RuntimeError):
# the "except" part of this try-except is basically vendored
# from the zoneinfo library.
#
# There are three types of exception that can be raised that all amount
# to "we cannot find this key":
#
# ImportError: If package_name doesn't exist (e.g. if tzdata is not
# installed, or if there's an error in the folder name like
# Amrica/New_York)
# FileNotFoundError: If resource_name doesn't exist in the package
# (e.g. Europe/Krasnoy)
# UnicodeEncodeError: If package_name or resource_name are not UTF-8,
# such as keys containing a surrogate character.
raise zoneinfo.ZoneInfoNotFoundError(zone_name)


def _read_tzfile_as_frame(tzdir, zone_name):
dt, offsets = build_timezone_transition_table(tzdir, zone_name)
return DataFrame._from_columns([dt, offsets], ["dt", "offsets"])