diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c71e97b835..b51dc86ae1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ jobs:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
with:
- extras: '-E numpy'
+ extras: '-E pandas'
- name: Linting and static code checks
run: pre-commit run --all-files
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4817913764..7c95b5f117 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,13 +17,21 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.
+## [7.45.0] - 2024-05-28
+### Added
+- DatapointsAPI now support `timezone` and new calendar-based granularities like `month`, `quarter` and `year`.
+ These API features are in beta, and the SDK implementation in alpha, meaning breaking changes can
+ occur without warning. Set beta header to avoid warning. Users of `retrieve_dataframe_in_tz` should
+ consider preparing to upgrade as soon as the features reach general availability (GA).
+
## [7.44.1] - 2024-05-24
### Added
-- Missing parameter `timeout` to `client.transformations.preview`.
+- Missing parameter `timeout` to `client.transformations.preview`.
## [7.44.0] - 2024-05-24
### Added
-- New utility function `datetime_to_ms_iso_timestamp` in `cognite.client.utils` to convert a datetime object to a string representing a timestamp in the format expected by the Cognite GraphQL API.
+- New utility function `datetime_to_ms_iso_timestamp` in `cognite.client.utils` to convert a datetime object
+ to a string representing a timestamp in the format expected by the Cognite GraphQL API.
## [7.43.6] - 2024-05-27
### Improved
diff --git a/cognite/client/_api/datapoint_tasks.py b/cognite/client/_api/datapoint_tasks.py
index bb05c83b5c..4f16d1f96e 100644
--- a/cognite/client/_api/datapoint_tasks.py
+++ b/cognite/client/_api/datapoint_tasks.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import datetime
import math
import numbers
import operator as op
@@ -7,7 +8,6 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
-from datetime import datetime
from functools import cached_property
from itertools import chain
from typing import (
@@ -48,8 +48,12 @@
from cognite.client.utils._auxiliary import is_unlimited
from cognite.client.utils._text import convert_all_keys_to_snake_case, to_snake_case
from cognite.client.utils._time import (
+ ZoneInfo,
align_start_and_end_for_granularity,
+ convert_timezone_to_str,
granularity_to_ms,
+ parse_str_timezone,
+ split_granularity_into_quantity_and_normalized_unit,
split_time_range,
time_ago_to_ms,
timestamp_to_ms,
@@ -92,12 +96,13 @@ class _FullDatapointsQuery:
requested, previously public (before v5).
"""
- start: int | str | datetime | None = None
- end: int | str | datetime | None = None
+ start: int | str | datetime.datetime | None = None
+ end: int | str | datetime.datetime | None = None
id: DatapointsId | None = None
external_id: DatapointsExternalId | None = None
aggregates: Aggregate | str | list[Aggregate | str] | None = None
granularity: str | None = None
+ timezone: str | datetime.timezone | ZoneInfo | None = None
target_unit: str | None = None
target_unit_system: str | None = None
limit: int | None = None
@@ -125,6 +130,7 @@ def top_level_defaults(self) -> dict[str, Any]:
limit=self.limit,
aggregates=self.aggregates,
granularity=self.granularity,
+ timezone=self.timezone,
target_unit=self.target_unit,
target_unit_system=self.target_unit_system,
include_outside_points=self.include_outside_points,
@@ -170,7 +176,7 @@ def _parse(
elif isinstance(query, DatapointsQuery):
if query.identifier.name() != arg_name:
raise ValueError(f"DatapointsQuery passed by {arg_name} is missing required field {arg_name!r}")
- query._set_defaults(self.top_level_defaults)
+ query = DatapointsQuery.from_dict({**self.top_level_defaults, **query.dump()}, id_type=arg_name)
else:
self._raise_on_wrong_ts_identifier_type(query, arg_name, exp_type)
@@ -194,13 +200,21 @@ def validate(self, queries: list[DatapointsQuery], dps_limit_raw: int, dps_limit
# exception 'end not after start' if both are set to the same value.
frozen_time_now = timestamp_to_ms("now")
+ # NOTE: The order of verification checks must be kept due to dependencies:
for query in queries:
query.limit = self._verify_and_convert_limit(query.limit)
query.is_raw_query = self._verify_options_and_categorize_query(query)
- query.max_query_limit = dps_limit_raw if query.is_raw_query else dps_limit_agg
+ query.original_timezone, query.timezone = self._verify_and_convert_timezone(
+ query.timezone, query.is_raw_query
+ )
+ query.granularity, query.is_calendar_query = self._verify_and_convert_granularity(query.granularity)
query.start, query.end = self._verify_time_range(query, frozen_time_now)
- if not query.is_raw_query and isinstance(query.aggregates, str):
- query.aggregates = [query.aggregates]
+ if query.is_raw_query:
+ query.max_query_limit = dps_limit_raw
+ else:
+ query.max_query_limit = dps_limit_agg
+ if isinstance(query.aggregates, str):
+ query.aggregates = [query.aggregates]
return queries
@staticmethod
@@ -239,6 +253,33 @@ def _verify_options_and_categorize_query(query: DatapointsQuery) -> bool:
raise ValueError("'Include status' is not supported for aggregates.")
return False
+ @staticmethod
+ def _verify_and_convert_timezone(
+ tz: str | datetime.timezone | ZoneInfo | None, is_raw_query: bool
+ ) -> tuple[datetime.timezone | ZoneInfo | None, str | None]:
+ if tz is None:
+ return None, None
+ elif isinstance(tz, str):
+ tz = parse_str_timezone(tz) # There...
+ try:
+ api_tz = convert_timezone_to_str(tz) # ...and back again
+ except TypeError:
+ raise ValueError(
+ f"'timezone' not understood, expected one of: [None, str, datetime.timezone, ZoneInfo], got {type(tz)}"
+ )
+ if is_raw_query:
+ # Timezone will only be used for display purposes (or when converting to pandas), so we fetch
+ # like it doesn't exist (concurrently). The API only supports using timezone with agg. queries.
+ return tz, None
+ return tz, api_tz
+
+ @staticmethod
+ def _verify_and_convert_granularity(granularity: str | None) -> tuple[str | None, bool]:
+ if granularity is None:
+ return None, False
+ quantity, unit = split_granularity_into_quantity_and_normalized_unit(granularity)
+ return f"{quantity}{unit}", unit == "mo"
+
@staticmethod
def _verify_and_convert_limit(limit: int | None) -> int | None:
if is_unlimited(limit):
@@ -254,24 +295,28 @@ def _verify_and_convert_limit(limit: int | None) -> int | None:
f"indicate an unlimited query. Got: {limit} with type: {type(limit)}"
)
+ @staticmethod
def _verify_time_range(
- self,
query: DatapointsQuery,
frozen_time_now: int,
) -> tuple[int, int]:
- start = self._ts_to_ms_frozen_now(query.start, frozen_time_now, default=0) # 1970-01-01
- end = self._ts_to_ms_frozen_now(query.end, frozen_time_now, default=frozen_time_now)
+ start = _FullDatapointsQuery._ts_to_ms_frozen_now(query.start, frozen_time_now, default=0) # 1970-01-01
+ end = _FullDatapointsQuery._ts_to_ms_frozen_now(query.end, frozen_time_now, default=frozen_time_now)
if end <= start:
raise ValueError(
- f"Invalid time range, {end=} ({query.end}) must be later than {start=} ({query.start})"
+ f"Invalid time range, {end=} {f'({query.end!r}) ' if end != query.end else ''}"
+ f"must be later than {start=} {f'({query.start!r}) ' if start != query.start else ''}"
f"(from query: {query.identifier.as_dict(camel_case=False)})"
)
- if not query.is_raw_query:
+ # We align start and end so that we can efficiently parallelize aggregate dps fetching. Queries
+ # using timezone or a calendar granularity (month) are left untouched (and thus fetch serially):
+ if not (query.is_raw_query or query.use_cursors):
# API rounds aggregate query timestamps in a very particular fashion:
- start, end = align_start_and_end_for_granularity(start, end, query.granularity)
+ start, end = align_start_and_end_for_granularity(start, end, cast(str, query.granularity))
return start, end
- def _ts_to_ms_frozen_now(self, ts: int | str | datetime | None, frozen_time_now: int, default: int) -> int:
+ @staticmethod
+ def _ts_to_ms_frozen_now(ts: int | str | datetime.datetime | None, frozen_time_now: int, default: int) -> int:
# Time 'now' is frozen for all queries in a single call from the user, leading to identical
# results e.g. "4d-ago" and "now"
if ts is None:
@@ -407,7 +452,7 @@ def get_datapoints_from_proto(res: DataPointListItem) -> DatapointsAny:
return cast(DatapointsAny, [])
-def get_ts_info_from_proto(res: DataPointListItem) -> dict[str, int | str | bool]:
+def get_ts_info_from_proto(res: DataPointListItem) -> dict[str, int | str | bool | None]:
# Note: When 'unit_external_id' is returned, regular 'unit' is ditched
return {
"id": res.id,
@@ -470,6 +515,9 @@ def __init__(self, start: int, end: int, parent: BaseTaskOrchestrator) -> None:
if query.include_status is True:
self.static_kwargs["includeStatus"] = query.include_status
+ if query.timezone:
+ self.static_kwargs["timeZone"] = query.timezone
+
@abstractmethod
def get_next_payload_item(self) -> _DatapointsPayloadItem: ...
@@ -501,10 +549,12 @@ def store_partial_result(self, res: DataPointListItem) -> None:
class SerialFetchSubtask(BaseDpsFetchSubtask):
"""Fetches datapoints serially until complete, nice and simple. Stores data in parent"""
- def __init__(self, *, subtask_idx: tuple[float, ...], **kwargs: Any) -> None:
+ def __init__(self, *, subtask_idx: tuple[float, ...], first_cursor: str | None = None, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.subtask_idx = subtask_idx
self.next_start = self.start
+ self.next_cursor = first_cursor
+ self.uses_cursor = self.parent.query.use_cursors
def get_next_payload_item(self) -> _DatapointsPayloadItem:
remaining = self.parent.get_remaining_limit()
@@ -512,11 +562,12 @@ def get_next_payload_item(self) -> _DatapointsPayloadItem:
start=self.next_start,
end=self.end,
limit=min(self.max_query_limit, remaining),
+ cursor=self.next_cursor,
**self.static_kwargs, # type: ignore [typeddict-item]
)
def store_partial_result(self, res: DataPointListItem) -> list[SplittingFetchSubtask] | None:
- if self.parent.ts_info is None:
+ if not self.parent.ts_info:
# In eager mode, first task to complete gets the honor to store ts info:
self.parent._store_ts_info(res)
@@ -526,17 +577,23 @@ def store_partial_result(self, res: DataPointListItem) -> list[SplittingFetchSub
n, last_ts = len(dps), dps[-1].timestamp
self.parent._unpack_and_store(self.subtask_idx, dps)
- self._update_state_for_next_payload(last_ts, n)
- if self._is_task_done(n, res.nextCursor):
+ self._update_state_for_next_payload(res, last_ts, n)
+ if self._is_task_done(n):
self.is_done = True
return None
- def _update_state_for_next_payload(self, last_ts: int, n: int) -> None:
- self.next_start = last_ts + self.parent.offset_next # Move `start` to prepare for next query
+ def _update_state_for_next_payload(self, res: DataPointListItem, last_ts: int, n: int) -> None:
+ self.next_cursor = res.nextCursor
+ if not self.uses_cursor:
+ self.next_start = last_ts + self.parent.offset_next # Move `start` to prepare for next query
self.n_dps_fetched += n # Used to quit limited queries asap
- def _is_task_done(self, n: int, next_cursor: str) -> bool:
- return not next_cursor or n < self.max_query_limit or self.next_start == self.end
+ def _is_task_done(self, n: int) -> bool:
+ return (
+ not self.next_cursor
+ or n < self.max_query_limit
+ or not self.uses_cursor and self.next_start == self.end
+ ) # fmt: skip
class SplittingFetchSubtask(SerialFetchSubtask):
@@ -587,15 +644,15 @@ def _split_self_into_new_subtasks_if_needed(self, last_ts: int) -> list[Splittin
return new_subtasks
-def get_task_orchestrator(is_raw_query: bool, limit: None | int) -> type[BaseTaskOrchestrator]:
- if is_raw_query:
- if limit is None:
+def get_task_orchestrator(query: DatapointsQuery) -> type[BaseTaskOrchestrator]:
+ if query.is_raw_query:
+ if query.limit is None:
return ConcurrentUnlimitedRawTaskOrchestrator
return SerialLimitedRawTaskOrchestrator
else:
- if limit is None:
- return ConcurrentUnlimitedAggTaskOrchestrator
- return SerialLimitedAggTaskOrchestrator
+ if query.limit is not None or query.use_cursors:
+ return SerialLimitedAggTaskOrchestrator
+ return ConcurrentUnlimitedAggTaskOrchestrator
class BaseTaskOrchestrator(ABC):
@@ -610,10 +667,9 @@ def __init__(
self.query = query
self.eager_mode = eager_mode
self.use_numpy = use_numpy
- self.ts_info: dict | None = None
+ self.ts_info: dict[str, Any] = {}
self.subtask_outside_points: OutsideDpsFetchSubtask | None = None
self.raw_dtype_numpy: type[np.object_] | type[np.float64] | None = None
- self.has_limit = self.query.limit is not None
self._is_done = False
self._final_result: Datapoints | DatapointsArray | None = None
@@ -621,22 +677,25 @@ def __init__(
self.dps_data: _DataContainer = defaultdict(list)
self.subtasks: list[BaseDpsFetchSubtask] = []
- if self.query.is_raw_query:
- if self.query.include_status:
+ if query.is_raw_query:
+ if query.include_status:
self.status_code: _DataContainer = defaultdict(list)
self.status_symbol: _DataContainer = defaultdict(list)
- if self.use_numpy and not self.query.ignore_bad_datapoints:
+ if use_numpy and not query.ignore_bad_datapoints:
self.null_timestamps: set[int] = set()
# When running large queries (i.e. not "eager"), all time series have a first batch fetched before
# further subtasks are created. This gives us e.g. outside points for free (if asked for) and ts info:
- if not self.eager_mode:
+ if eager_mode:
+ self.first_cursor = None
+ else:
assert first_dps_batch is not None and first_limit is not None
+ self.first_cursor = first_dps_batch.nextCursor
self._extract_first_dps_batch(first_dps_batch, first_limit)
@property
def is_done(self) -> bool:
- if self.ts_info is None:
+ if not self.ts_info:
return False
elif self._is_done:
return True
@@ -650,13 +709,6 @@ def is_done(self) -> bool:
def is_done(self, value: bool) -> None:
self._is_done = value
- @property
- def ts_info_dct(self) -> dict[str, Any]:
- # This is mostly for mypy to avoid 'cast' x 10000, but also a nice check to make sure
- # we have the required ts info before returning a result dps object.
- assert self.ts_info is not None
- return self.ts_info
-
@property
def start_ts_first_batch(self) -> int:
ts = self.ts_data[FIRST_IDX][0][0]
@@ -682,20 +734,26 @@ def _extract_first_dps_batch(self, first_dps_batch: DataPointListItem, first_lim
self._store_first_batch(dps, first_limit)
def _store_ts_info(self, res: DataPointListItem) -> None:
- self.ts_info = get_ts_info_from_proto(res)
- self.ts_info["granularity"] = self.query.granularity
+ self.ts_info.update(get_ts_info_from_proto(res))
+ self.ts_info["timezone"] = self.query.original_timezone
+ self.ts_info["granularity"] = self.query.original_granularity # show '1quarter', not '3mo'
if self.use_numpy:
self.raw_dtype_numpy = decide_numpy_dtype_from_is_string(res.isString)
def _store_first_batch(self, dps: DatapointsAny, first_limit: int) -> None:
- # Set `start` for the first subtask:
- self.first_start = dps[-1].timestamp + self.offset_next
+ # Set `start` for the first subtask; since we have a cursor, this is only (really)
+ # needed for time domain splitting:
+ self.first_start = dps[-1].timestamp
+ if not self.query.use_cursors:
+ self.first_start += self.offset_next
self._unpack_and_store(FIRST_IDX, dps)
# Are we done after first batch?
- if self.first_start == self.query.end or len(dps) < first_limit:
+ if not self.first_cursor or len(dps) < first_limit:
+ self._is_done = True
+ elif not self.query.use_cursors and self.first_start == self.query.end:
self._is_done = True
- elif self.has_limit and len(dps) <= self.query.finite_limit <= first_limit:
+ elif self.query.limit is not None and len(dps) <= self.query.limit <= first_limit: # TODO: len == limit??
self._is_done = True
def _clear_data_containers(self) -> None:
@@ -743,15 +801,19 @@ def _unpack_and_store(self, idx: tuple[float, ...], dps: DatapointsAny) -> None:
class SerialTaskOrchestratorMixin(BaseTaskOrchestrator):
- def get_remaining_limit(self) -> int:
+ def get_remaining_limit(self) -> float:
assert len(self.subtasks) == 1
- return self.query.finite_limit - self.n_dps_first_batch - self.subtasks[0].n_dps_fetched
+ if self.query.limit is None:
+ return math.inf
+ return self.query.limit - self.n_dps_first_batch - self.subtasks[0].n_dps_fetched
def split_into_subtasks(self, max_workers: int, n_tot_queries: int) -> list[BaseDpsFetchSubtask]:
# For serial fetching, a single task suffice
start = self.query.start if self.eager_mode else self.first_start
subtasks: list[BaseDpsFetchSubtask] = [
- SerialFetchSubtask(start=start, end=self.query.end, parent=self, subtask_idx=FIRST_IDX)
+ SerialFetchSubtask(
+ start=start, end=self.query.end, parent=self, subtask_idx=FIRST_IDX, first_cursor=self.first_cursor
+ )
]
self.subtasks.extend(subtasks)
self._maybe_queue_outside_dps_subtask(subtasks)
@@ -778,13 +840,13 @@ def _create_empty_result(self) -> Datapoints | DatapointsArray:
if not self.use_numpy:
if self.query.include_status:
status_cols.update(status_code=[], status_symbol=[])
- return Datapoints(**self.ts_info_dct, timestamp=[], value=[], **status_cols)
+ return Datapoints(**self.ts_info, timestamp=[], value=[], **status_cols)
if self.query.include_status:
status_cols.update(status_code=np.array([], dtype=np.int32), status_symbol=np.array([], dtype=np.object_))
return DatapointsArray._load_from_arrays(
{
- **self.ts_info_dct,
+ **self.ts_info,
"timestamp": np.array([], dtype=np.int64),
"value": np.array([], dtype=self.raw_dtype_numpy),
**status_cols,
@@ -811,7 +873,7 @@ def _get_result(self) -> Datapoints | DatapointsArray:
status_columns["null_timestamps"] = self.null_timestamps
return DatapointsArray._load_from_arrays(
{
- **self.ts_info_dct,
+ **self.ts_info,
"timestamp": create_array_from_dps_container(self.ts_data),
"value": create_array_from_dps_container(self.dps_data),
**status_columns,
@@ -823,7 +885,7 @@ def _get_result(self) -> Datapoints | DatapointsArray:
status_symbol=create_list_from_dps_container(self.status_symbol),
)
return Datapoints(
- **self.ts_info_dct,
+ **self.ts_info,
timestamp=create_list_from_dps_container(self.ts_data),
value=create_list_from_dps_container(self.dps_data),
**status_columns,
@@ -965,7 +1027,7 @@ def __init__(self, *, query: DatapointsQuery, use_numpy: bool, **kwargs: Any) ->
@cached_property
def offset_next(self) -> int:
- return granularity_to_ms(self.query.granularity)
+ return granularity_to_ms(cast(str, self.query.granularity))
def _set_aggregate_vars(self, aggs_camel_case: list[str], use_numpy: bool) -> None:
# Developer note here: If you ask for datapoints to be returned in JSON, you get `count` as an integer.
@@ -994,10 +1056,10 @@ def _create_empty_result(self) -> Datapoints | DatapointsArray:
arr_dct.update({agg: np.array([], dtype=np.float64) for agg in self.float_aggs})
if self.int_aggs:
arr_dct.update({agg: np.array([], dtype=np.int64) for agg in self.int_aggs})
- return DatapointsArray._load_from_arrays({**self.ts_info_dct, **arr_dct})
+ return DatapointsArray._load_from_arrays({**self.ts_info, **arr_dct})
lst_dct: dict[str, list] = {agg: [] for agg in self.all_aggregates}
- return Datapoints(timestamp=[], **self.ts_info_dct, **convert_all_keys_to_snake_case(lst_dct))
+ return Datapoints(timestamp=[], **self.ts_info, **convert_all_keys_to_snake_case(lst_dct))
def _get_result(self) -> Datapoints | DatapointsArray:
if not self.ts_data or self.query.limit == 0:
@@ -1015,7 +1077,7 @@ def _get_result(self) -> Datapoints | DatapointsArray:
arr_dct[agg] = np.nan_to_num(arr_dct[agg], copy=False, nan=0.0, posinf=np.inf, neginf=-np.inf).astype(
np.int64
)
- return DatapointsArray._load_from_arrays({**self.ts_info_dct, **arr_dct})
+ return DatapointsArray._load_from_arrays({**self.ts_info, **arr_dct})
lst_dct = {"timestamp": create_list_from_dps_container(self.ts_data)}
if self.single_agg:
@@ -1026,7 +1088,7 @@ def _get_result(self) -> Datapoints | DatapointsArray:
for agg in self.int_aggs:
# Need to do an extra NaN-aware int-conversion because protobuf (as opposed to json) returns double:
lst_dct[agg] = list(map(ensure_int, lst_dct[agg]))
- return Datapoints(**self.ts_info_dct, **convert_all_keys_to_snake_case(lst_dct))
+ return Datapoints(**self.ts_info, **convert_all_keys_to_snake_case(lst_dct))
def _unpack_and_store(self, idx: tuple[float, ...], dps: AggregateDatapoints) -> None: # type: ignore [override]
if self.use_numpy:
diff --git a/cognite/client/_api/datapoints.py b/cognite/client/_api/datapoints.py
index 2493521346..6f7da9b2a2 100644
--- a/cognite/client/_api/datapoints.py
+++ b/cognite/client/_api/datapoints.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import datetime
import functools
import heapq
import itertools
@@ -7,7 +8,6 @@
import time
from abc import ABC, abstractmethod
from collections import defaultdict
-from datetime import datetime
from itertools import chain
from operator import itemgetter
from typing import (
@@ -56,9 +56,11 @@
unpack_items_in_payload,
)
from cognite.client.utils._concurrency import ConcurrencySettings, execute_tasks
+from cognite.client.utils._experimental import FeaturePreviewWarning
from cognite.client.utils._identifier import Identifier, IdentifierSequence, IdentifierSequenceCore
from cognite.client.utils._importing import import_as_completed, local_import
from cognite.client.utils._time import (
+ ZoneInfo,
align_large_granularity,
pandas_date_range_tz,
timestamp_to_ms,
@@ -92,13 +94,20 @@ def select_dps_fetch_strategy(dps_client: DatapointsAPI, full_query: _FullDatapo
full_query.validate(all_queries, dps_limit_raw=dps_client._DPS_LIMIT_RAW, dps_limit_agg=dps_client._DPS_LIMIT_AGG)
agg_queries, raw_queries = split_queries_into_raw_and_aggs(all_queries)
+ # If timezone or calendar-based aggregates are requested, use beta:
+ api_subversion = None
+ if any(query.use_cursors for query in all_queries):
+ if "beta" not in dps_client._api_subversion:
+ api_subversion = dps_client._api_subversion + "-beta"
+ dps_client._timezone_calendar_aggs_warning.warn() # We only warn when we autouse 'beta'
+
# Running mode is decided based on how many time series are requested VS. number of workers:
if len(all_queries) <= (max_workers := dps_client._config.max_workers):
# Start shooting requests from the hip immediately:
- return EagerDpsFetcher(dps_client, all_queries, agg_queries, raw_queries, max_workers)
+ return EagerDpsFetcher(dps_client, all_queries, agg_queries, raw_queries, max_workers, api_subversion)
# Fetch a smaller, chunked batch of dps from all time series - which allows us to do some rudimentary
# guesstimation of dps density - then chunk away:
- return ChunkingDpsFetcher(dps_client, all_queries, agg_queries, raw_queries, max_workers)
+ return ChunkingDpsFetcher(dps_client, all_queries, agg_queries, raw_queries, max_workers, api_subversion)
def split_queries_into_raw_and_aggs(all_queries: _TSQueryList) -> tuple[_TSQueryList, _TSQueryList]:
@@ -116,12 +125,14 @@ def __init__(
agg_queries: _TSQueryList,
raw_queries: _TSQueryList,
max_workers: int,
+ api_subversion: str | None,
) -> None:
self.dps_client = dps_client
self.all_queries = all_queries
self.agg_queries = agg_queries
self.raw_queries = raw_queries
self.max_workers = max_workers
+ self.api_subversion = api_subversion
self.n_queries = len(all_queries)
def fetch_all_datapoints(self) -> DatapointsList:
@@ -146,6 +157,7 @@ def _request_datapoints(self, payload: _DatapointsPayload) -> Sequence[DataPoint
url_path=f"{self.dps_client._RESOURCE_PATH}/list",
accept="application/protobuf",
timeout=self.dps_client._config.timeout,
+ api_subversion=self.api_subversion,
).content
)
return res.items
@@ -464,7 +476,7 @@ def _update_queries_with_new_chunking_limit(
# no chunking), and which are not (...and may be grouped - and how "tightly"):
for query, ts_task in remaining_tasks.items():
est_limit = self._decide_individual_query_limit(query, ts_task, self.dps_client._FETCH_TS_LIMIT)
- query.override_max_query_limit(est_limit)
+ query.max_query_limit = est_limit
return list(remaining_tasks.values())
@@ -519,6 +531,9 @@ def __init__(self, config: ClientConfig, api_version: str | None, cognite_client
self._DPS_INSERT_LIMIT = 100_000
self._RETRIEVE_LATEST_LIMIT = 100
self._POST_DPS_OBJECTS_LIMIT = 10_000
+ self._timezone_calendar_aggs_warning = FeaturePreviewWarning(
+ "beta", "alpha", feature_name="Timezone & Calender-based aggregations"
+ )
def retrieve(
self,
@@ -529,10 +544,11 @@ def retrieve(
| DatapointsQuery
| dict[str, Any]
| SequenceNotStr[str | DatapointsQuery | dict[str, Any]] = None,
- start: int | str | datetime | None = None,
- end: int | str | datetime | None = None,
+ start: int | str | datetime.datetime | None = None,
+ end: int | str | datetime.datetime | None = None,
aggregates: Aggregate | str | list[Aggregate | str] | None = None,
granularity: str | None = None,
+ timezone: str | datetime.timezone | ZoneInfo | None = None,
target_unit: str | None = None,
target_unit_system: str | None = None,
limit: int | None = None,
@@ -551,6 +567,7 @@ def retrieve(
2. Unlimited queries (``limit=None``) are most performant as they are always fetched in parallel, for any number of requested time series.
3. Limited queries, (e.g. ``limit=500_000``) are much less performant, at least for large limits, as each individual time series is fetched serially (we can't predict where on the timeline the datapoints are). Thus parallelisation is only used when asking for multiple "limited" time series.
4. Try to avoid specifying `start` and `end` to be very far from the actual data: If you have data from 2000 to 2015, don't use start=0 (1970).
+ 5. Using ``timezone`` and/or calendar granularities like month/quarter/year in aggregate queries comes at a penalty.
Time series support status codes like Good, Uncertain and Bad. You can read more in the Cognite Data Fusion developer documentation on
`status codes. `_
@@ -558,10 +575,18 @@ def retrieve(
Args:
id (None | int | DatapointsQuery | dict[str, Any] | Sequence[int | DatapointsQuery | dict[str, Any]]): Id, dict (with id) or (mixed) sequence of these. See examples below.
external_id (None | str | DatapointsQuery | dict[str, Any] | SequenceNotStr[str | DatapointsQuery | dict[str, Any]]): External id, dict (with external id) or (mixed) sequence of these. See examples below.
- start (int | str | datetime | None): Inclusive start. Default: 1970-01-01 UTC.
- end (int | str | datetime | None): Exclusive end. Default: "now"
- aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Default: None (raw datapoints returned)
- granularity (str | None): The granularity to fetch aggregates at. e.g. '15s', '2h', '10d'. Default: None.
+ start (int | str | datetime.datetime | None): Inclusive start. Default: 1970-01-01 UTC.
+ end (int | str | datetime.datetime | None): Exclusive end. Default: "now"
+ aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``,
+ ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``.
+ Default: None (raw datapoints returned)
+ granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``,
+ ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None.
+ timezone (str | datetime.timezone | ZoneInfo | None): For raw datapoints, which timezone to use when displaying (will not affect what is retrieved).
+ For aggregates, which timezone to align to for granularity 'hour' and longer. Align to the start of the hour, day or month. For timezones of type Region/Location,
+ like 'Europe/Oslo', pass a string or ``ZoneInfo`` instance. The aggregate duration will then vary, typically due to daylight saving time. You can also use a fixed offset
+ from UTC by passing a string like '+04:00', 'UTC-7' or 'UTC-02:30' or an instance of ``datetime.timezone``. Note: Historical timezones with second offset are not
+ supported, and timezones with minute offsets (e.g. UTC+05:30 or Asia/Kolkata) may take longer to execute.
target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit.
limit (int | None): Maximum number of datapoints to return for each time series. Default: None (no limit)
@@ -596,7 +621,7 @@ def retrieve(
In the two code examples above, we have a `dps` object (an instance of ``Datapoints``), and a `dps_lst` object (an instance of ``DatapointsList``).
On `dps`, which in this case contains raw datapoints, you may access the underlying data directly by using the `.value` attribute. This works for
- both numeric and string (raw) datapoints, but not aggregates - they must be accessed by their respective names, because you're allowed to fetch up
+ both numeric and string (raw) datapoints, but not aggregates - they must be accessed by their respective names, because you're allowed to fetch
all available aggregates simultaneously, and they are stored on the same object:
>>> raw_data = dps.value
@@ -614,8 +639,8 @@ def retrieve(
... pass # do something!
All parameters can be individually set if you use and pass ``DatapointsQuery`` objects (even ``ignore_unknown_ids``, contrary to the API).
- If you also pass top-level parameters, these will be overruled by the individual parameters (where both exist). You are free to
- mix any kind of ids and external ids: Single identifiers, single DatapointsQuery objects and (mixed) lists of these.
+ If you also pass top-level parameters, these will be overruled by the individual parameters (where both exist, so think of these as defaults).
+ You are free to mix any kind of ids and external ids: Single identifiers, single DatapointsQuery objects and (mixed) lists of these.
Let's say you want different aggregates and end-times for a few time series (when only fetching a single aggregate, you may pass
the string directly for convenience):
@@ -623,7 +648,7 @@ def retrieve(
>>> from cognite.client.data_classes import DatapointsQuery
>>> dps_lst = client.time_series.data.retrieve(
... id=[
- ... DatapointsQuery(id=42, end="1d-ago", aggregates= "average"),
+ ... DatapointsQuery(id=42, end="1d-ago", aggregates="average"),
... DatapointsQuery(id=69, end="2d-ago", aggregates=["average"]),
... DatapointsQuery(id=96, end="3d-ago", aggregates=["min", "max", "count"]),
... ],
@@ -631,9 +656,20 @@ def retrieve(
... start="5d-ago",
... granularity="1h")
+ Certain aggregates are very useful when they follow the calendar, for example electricity consumption per day, week, month
+ or year. You may request such calendar-based aggregates in a specific timezone to make them even more useful: daylight savings (DST)
+ will be taken care of automatically and the datapoints will be aligned to the timezone. Note: Calendar granularities and timezone
+ can be used independently. To get monthly local aggregates in Oslo, Norway you can do:
+
+ >>> dps = client.time_series.data.retrieve(
+ ... id=123,
+ ... aggregates="sum",
+ ... granularity="1month",
+ ... timezone="Europe/Oslo")
+
When requesting multiple time series, an easy way to get the datapoints of a specific one is to use the `.get` method
on the returned ``DatapointsList`` object, then specify if you want `id` or `external_id`. Note: If you fetch a time series
- by using `id`, you can still access it with its `external_id` (and the opposite way around), if you know it::
+ by using `id`, you can still access it with its `external_id` (and the opposite way around), if you know it:
>>> from datetime import datetime, timezone
>>> utc = timezone.utc
@@ -663,7 +699,7 @@ def retrieve(
>>> ts_44 = dps_lst.get(id=44) # Single ``Datapoints`` object
>>> ts_lst = dps_lst.get(external_id=sensor_xid) # List of ``len(periods)`` ``Datapoints`` objects
- The API has an endpoint to "retrieve latest (before)", but not "after". Luckily, we can emulate that behaviour easily.
+ The API has an endpoint to :py:meth:`~DatapointsAPI.retrieve_latest`, i.e. "before", but not "after". Luckily, we can emulate that behaviour easily.
Let's say we have a very dense time series and do not want to fetch all of the available raw data (or fetch less precise
aggregate data), just to get the very first datapoint of every month (from e.g. the year 2000 through 2010)::
@@ -676,7 +712,7 @@ def retrieve(
... limit=1)
To get *all* historic and future datapoints for a time series, e.g. to do a backup, you may want to import the two integer
- constants: `MIN_TIMESTAMP_MS` and `MAX_TIMESTAMP_MS`, to make sure you do not miss any. Performance warning: This pattern of
+ constants: ``MIN_TIMESTAMP_MS`` and ``MAX_TIMESTAMP_MS``, to make sure you do not miss any. **Performance warning**: This pattern of
fetching datapoints from the entire valid time domain is slower and shouldn't be used for regular "day-to-day" queries:
>>> from cognite.client.utils import MIN_TIMESTAMP_MS, MAX_TIMESTAMP_MS
@@ -685,30 +721,9 @@ def retrieve(
... start=MIN_TIMESTAMP_MS,
... end=MAX_TIMESTAMP_MS + 1) # end is exclusive
- Another example here is just to showcase the great flexibility of the `retrieve` endpoint, with a very custom query:
-
- >>> ts1 = 1337
- >>> ts2 = DatapointsQuery(
- ... id=42,
- ... start=-12345, # Overrides `start` arg. below
- ... end="1h-ago",
- ... limit=1000, # Overrides `limit` arg. below
- ... include_outside_points=True,
- ... )
- >>> ts3 = DatapointsQuery(
- ... id=11,
- ... end="1h-ago",
- ... aggregates="max",
- ... granularity="42h",
- ... include_outside_points=False,
- ... ignore_unknown_ids=True, # Overrides `ignore_unknown_ids` arg. below
- ... )
- >>> dps_lst = client.time_series.data.retrieve(
- ... id=[ts1, ts2, ts3], start="2w-ago", limit=None, ignore_unknown_ids=False)
-
If you have a time series with 'unit_external_id' set, you can use the 'target_unit' parameter to convert the datapoints
to the desired unit. In the example below, we are converting temperature readings from a sensor measured and stored in Celsius,
- to Fahrenheit (we're assuming that the time series has e.g. ``unit_external_id='temperature:deg_c'``):
+ to Fahrenheit (we're assuming that the time series has e.g. ``unit_external_id="temperature:deg_c"`` ):
>>> client.time_series.data.retrieve(
... id=42, start="2w-ago", target_unit="temperature:deg_f")
@@ -741,6 +756,7 @@ def retrieve(
external_id=external_id,
aggregates=aggregates,
granularity=granularity,
+ timezone=timezone,
target_unit=target_unit,
target_unit_system=target_unit_system,
limit=limit,
@@ -768,10 +784,11 @@ def retrieve_arrays(
| DatapointsQuery
| dict[str, Any]
| SequenceNotStr[str | DatapointsQuery | dict[str, Any]] = None,
- start: int | str | datetime | None = None,
- end: int | str | datetime | None = None,
+ start: int | str | datetime.datetime | None = None,
+ end: int | str | datetime.datetime | None = None,
aggregates: Aggregate | str | list[Aggregate | str] | None = None,
granularity: str | None = None,
+ timezone: str | datetime.timezone | ZoneInfo | None = None,
target_unit: str | None = None,
target_unit_system: str | None = None,
limit: int | None = None,
@@ -792,10 +809,18 @@ def retrieve_arrays(
Args:
id (None | int | DatapointsQuery | dict[str, Any] | Sequence[int | DatapointsQuery | dict[str, Any]]): Id, dict (with id) or (mixed) sequence of these. See examples below.
external_id (None | str | DatapointsQuery | dict[str, Any] | SequenceNotStr[str | DatapointsQuery | dict[str, Any]]): External id, dict (with external id) or (mixed) sequence of these. See examples below.
- start (int | str | datetime | None): Inclusive start. Default: 1970-01-01 UTC.
- end (int | str | datetime | None): Exclusive end. Default: "now"
- aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Default: None (raw datapoints returned)
- granularity (str | None): The granularity to fetch aggregates at. e.g. '15s', '2h', '10d'. Default: None.
+ start (int | str | datetime.datetime | None): Inclusive start. Default: 1970-01-01 UTC.
+ end (int | str | datetime.datetime | None): Exclusive end. Default: "now"
+ aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``,
+ ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``.
+ Default: None (raw datapoints returned)
+ granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``,
+ ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None.
+ timezone (str | datetime.timezone | ZoneInfo | None): For raw datapoints, which timezone to use when displaying (will not affect what is retrieved).
+ For aggregates, which timezone to align to for granularity 'hour' and longer. Align to the start of the hour, day or month. For timezones of type Region/Location,
+ like 'Europe/Oslo', pass a string or ``ZoneInfo`` instance. The aggregate duration will then vary, typically due to daylight saving time. You can also use a fixed offset
+ from UTC by passing a string like '+04:00', 'UTC-7' or 'UTC-02:30' or an instance of ``datetime.timezone``. Note: Historical timezones with second offset are not
+ supported, and timezones with minute offsets (e.g. UTC+05:30 or Asia/Kolkata) may take longer to execute.
target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit.
limit (int | None): Maximum number of datapoints to return for each time series. Default: None (no limit)
@@ -867,6 +892,7 @@ def retrieve_arrays(
external_id=external_id,
aggregates=aggregates,
granularity=granularity,
+ timezone=timezone,
target_unit=target_unit,
target_unit_system=target_unit_system,
limit=limit,
@@ -894,10 +920,11 @@ def retrieve_dataframe(
| DatapointsQuery
| dict[str, Any]
| SequenceNotStr[str | DatapointsQuery | dict[str, Any]] = None,
- start: int | str | datetime | None = None,
- end: int | str | datetime | None = None,
+ start: int | str | datetime.datetime | None = None,
+ end: int | str | datetime.datetime | None = None,
aggregates: Aggregate | str | list[Aggregate | str] | None = None,
granularity: str | None = None,
+ timezone: str | datetime.timezone | ZoneInfo | None = None,
target_unit: str | None = None,
target_unit_system: str | None = None,
limit: int | None = None,
@@ -917,15 +944,23 @@ def retrieve_dataframe(
`status codes. `_
Note:
- If you have duplicated time series in your query, the dataframe columns will also contain duplicates.
+ For many more usage examples, check out the :py:meth:`~DatapointsAPI.retrieve` method which accepts exactly the same arguments.
Args:
id (None | int | DatapointsQuery | dict[str, Any] | Sequence[int | DatapointsQuery | dict[str, Any]]): Id, dict (with id) or (mixed) sequence of these. See examples below.
external_id (None | str | DatapointsQuery | dict[str, Any] | SequenceNotStr[str | DatapointsQuery | dict[str, Any]]): External id, dict (with external id) or (mixed) sequence of these. See examples below.
- start (int | str | datetime | None): Inclusive start. Default: 1970-01-01 UTC.
- end (int | str | datetime | None): Exclusive end. Default: "now"
- aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Default: None (raw datapoints returned)
- granularity (str | None): The granularity to fetch aggregates at. e.g. '15s', '2h', '10d'. Default: None.
+ start (int | str | datetime.datetime | None): Inclusive start. Default: 1970-01-01 UTC.
+ end (int | str | datetime.datetime | None): Exclusive end. Default: "now"
+ aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``,
+ ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``.
+ Default: None (raw datapoints returned)
+ granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``,
+ ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None.
+ timezone (str | datetime.timezone | ZoneInfo | None): For raw datapoints, which timezone to use when displaying (will not affect what is retrieved).
+ For aggregates, which timezone to align to for granularity 'hour' and longer. Align to the start of the hour, -day or -month. For timezones of type Region/Location,
+ like 'Europe/Oslo', pass a string or ``ZoneInfo`` instance. The aggregate duration will then vary, typically due to daylight saving time. You can also use a fixed offset
+ from UTC by passing a string like '+04:00', 'UTC-7' or 'UTC-02:30' or an instance of ``datetime.timezone``. Note: Historical timezones with second offset are not
+ supported, and timezones with minute offsets (e.g. UTC+05:30 or Asia/Kolkata) may take longer to execute.
target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit.
limit (int | None): Maximum number of datapoints to return for each time series. Default: None (no limit)
@@ -943,6 +978,8 @@ def retrieve_dataframe(
pd.DataFrame: A pandas DataFrame containing the requested time series. The ordering of columns is ids first, then external_ids. For time series with multiple aggregates, they will be sorted in alphabetical order ("average" before "max").
Warning:
+ If you have duplicated time series in your query, the dataframe columns will also contain duplicates.
+
When retrieving raw datapoints with ``ignore_bad_datapoints=False``, bad datapoints with the value NaN can not be distinguished from those
missing a value (due to being stored in a numpy array); all will become NaNs in the dataframe.
@@ -1002,6 +1039,7 @@ def retrieve_dataframe(
external_id=external_id,
aggregates=aggregates,
granularity=granularity,
+ timezone=timezone,
target_unit=target_unit,
target_unit_system=target_unit_system,
limit=limit,
@@ -1015,25 +1053,26 @@ def retrieve_dataframe(
if not uniform_index:
return fetcher.fetch_all_datapoints_numpy().to_pandas(
- column_names, include_aggregate_name, include_granularity_name
+ column_names, include_aggregate_name, include_granularity_name, include_status=include_status
)
# Uniform index requires extra validation and processing:
+ uses_tz_or_calendar_gran = any(q.use_cursors for q in fetcher.all_queries)
grans_given = {q.granularity for q in fetcher.all_queries}
is_limited = any(q.limit is not None for q in fetcher.all_queries)
- if fetcher.raw_queries or len(grans_given) > 1 or is_limited:
+ if fetcher.raw_queries or len(grans_given) > 1 or is_limited or uses_tz_or_calendar_gran:
raise ValueError(
"Cannot return a uniform index when asking for aggregates with multiple granularities "
- f"({grans_given or []}) OR when (partly) querying raw datapoints OR when a finite limit is used."
+ f"({grans_given or []}) OR when (partly) querying raw datapoints OR when a finite limit is used "
+ "OR when timezone is used OR when a calendar granularity is used (e.g. month/quarter/year)"
)
-
df = fetcher.fetch_all_datapoints_numpy().to_pandas(
- column_names, include_aggregate_name, include_granularity_name
+ column_names, include_aggregate_name, include_granularity_name, include_status=include_status
)
start = pd.Timestamp(min(q.start_ms for q in fetcher.agg_queries), unit="ms")
end = pd.Timestamp(max(q.end_ms for q in fetcher.agg_queries), unit="ms")
(granularity,) = grans_given
# Pandas understand "Cognite granularities" except `m` (minutes) which we must translate:
- freq = granularity.replace("m", "min")
+ freq = cast(str, granularity).replace("m", "min")
return df.reindex(pd.date_range(start=start, end=end, freq=freq, inclusive="left"))
def retrieve_dataframe_in_tz(
@@ -1041,9 +1080,9 @@ def retrieve_dataframe_in_tz(
*,
id: int | Sequence[int] | None = None,
external_id: str | SequenceNotStr[str] | None = None,
- start: datetime,
- end: datetime,
- aggregates: Aggregate | str | Sequence[Aggregate | str] | None = None,
+ start: datetime.datetime,
+ end: datetime.datetime,
+ aggregates: Aggregate | str | list[Aggregate | str] | None = None,
granularity: str | None = None,
target_unit: str | None = None,
target_unit_system: str | None = None,
@@ -1056,7 +1095,7 @@ def retrieve_dataframe_in_tz(
include_granularity_name: bool = False,
column_names: Literal["id", "external_id"] = "external_id",
) -> pd.DataFrame:
- """Get datapoints directly in a pandas dataframe in the same time zone as ``start`` and ``end``.
+ """Get datapoints directly in a pandas dataframe in the same timezone as ``start`` and ``end``.
This is a convenience method extending the Time Series API capabilities to make timezone-aware datapoints
fetching easy with daylight saving time (DST) transitions taken care of automatically. It builds on top
@@ -1078,7 +1117,7 @@ def retrieve_dataframe_in_tz(
Warning:
The datapoints queries are translated into several sub-queries using a multiple of hours. This means that
- time zones that are not a whole hour offset from UTC are not supported. The same is true for time zones that
+ timezones that are not a whole hour offset from UTC are not supported. The same is true for timezones that
observe DST with an offset from standard time that is not a multiple of 1 hour.
It also sets an upper limit on the maximum granularity setting (around 11 years).
@@ -1086,23 +1125,26 @@ def retrieve_dataframe_in_tz(
Args:
id (int | Sequence[int] | None): ID or list of IDs.
external_id (str | SequenceNotStr[str] | None): External ID or list of External IDs.
- start (datetime): Inclusive start, must be time zone aware.
- end (datetime): Exclusive end, must be time zone aware and have the same time zone as start.
- aggregates (Aggregate | str | Sequence[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Default: None (raw datapoints returned)
- granularity (str | None): The granularity to fetch aggregates at, supported are: second, minute, hour, day, week, month, quarter and year. Default: None.
+ start (datetime.datetime): Inclusive start, must be timezone aware.
+ end (datetime.datetime): Exclusive end, must be timezone aware and have the same timezone as start.
+ aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``,
+ ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``.
+ Default: None (raw datapoints returned)
+ granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``,
+ ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None.
target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit.
ignore_unknown_ids (bool): Whether to ignore missing time series rather than raising an exception. Default: False
include_status (bool): Also return the status code, an integer, for each datapoint in the response. Only relevant for raw datapoint queries, not aggregates.
ignore_bad_datapoints (bool): Treat datapoints with a bad status code as if they do not exist. If set to false, raw queries will include bad datapoints in the response, and aggregates will in general omit the time period between a bad datapoint and the next good datapoint. Also, the period between a bad datapoint and the previous good datapoint will be considered constant. Default: True.
treat_uncertain_as_bad (bool): Treat datapoints with uncertain status codes as bad. If false, treat datapoints with uncertain status codes as good. Used for both raw queries and aggregates. Default: True.
- uniform_index (bool): If querying aggregates, specifying `uniform_index=True` will return a dataframe with an index with constant spacing between timestamps decided by granularity all the way from `start` to `end` (missing values will be NaNs). Default: False
+ uniform_index (bool): If querying aggregates with a non-calendar granularity, specifying ``uniform_index=True`` will return a dataframe with an index with constant spacing between timestamps decided by granularity all the way from `start` to `end` (missing values will be NaNs). Default: False
include_aggregate_name (bool): Include 'aggregate' in the column name, e.g. `my-ts|average`. Ignored for raw time series. Default: True
include_granularity_name (bool): Include 'granularity' in the column name, e.g. `my-ts|12h`. Added after 'aggregate' when present. Ignored for raw time series. Default: False
column_names (Literal["id", "external_id"]): Use either ids or external ids as column names. Time series missing external id will use id as backup. Default: "external_id"
Returns:
- pd.DataFrame: A pandas DataFrame containing the requested time series with a DatetimeIndex localized in the given time zone.
+ pd.DataFrame: A pandas DataFrame containing the requested time series with a DatetimeIndex localized in the given timezone.
Warning:
When retrieving raw datapoints with ``ignore_bad_datapoints=False``, bad datapoints with the value NaN can not be distinguished from those
@@ -1110,11 +1152,12 @@ def retrieve_dataframe_in_tz(
Examples:
- Get a pandas dataframe in the time zone of Oslo, Norway:
+ Get a pandas dataframe in the timezone of Oslo, Norway:
>>> from cognite.client import CogniteClient
>>> # In Python >=3.9 you may import directly from `zoneinfo`
>>> from cognite.client.utils import ZoneInfo
+ >>> from datetime import datetime
>>> client = CogniteClient()
>>> df = client.time_series.data.retrieve_dataframe_in_tz(
... id=12345,
@@ -1125,12 +1168,8 @@ def retrieve_dataframe_in_tz(
... column_names="id")
Get a pandas dataframe with the sum and continuous variance of the time series with external id "foo" and "bar",
- for each quarter from 2020 to 2022 in the time zone of New York, United States:
+ for each quarter from 2020 to 2022 in the timezone of New York, United States:
- >>> from cognite.client import CogniteClient
- >>> # In Python >=3.9 you may import directly from `zoneinfo`
- >>> from cognite.client.utils import ZoneInfo
- >>> client = CogniteClient()
>>> df = client.time_series.data.retrieve_dataframe_in_tz(
... external_id=["foo", "bar"],
... aggregates=["sum", "continuous_variance"],
@@ -1220,7 +1259,7 @@ def retrieve_latest(
self,
id: int | LatestDatapointQuery | list[int | LatestDatapointQuery] | None = None,
external_id: str | LatestDatapointQuery | list[str | LatestDatapointQuery] | None = None,
- before: None | int | str | datetime = None,
+ before: None | int | str | datetime.datetime = None,
target_unit: str | None = None,
target_unit_system: str | None = None,
include_status: bool = False,
@@ -1236,7 +1275,7 @@ def retrieve_latest(
Args:
id (int | LatestDatapointQuery | list[int | LatestDatapointQuery] | None): Id or list of ids.
external_id (str | LatestDatapointQuery | list[str | LatestDatapointQuery] | None): External id or list of external ids.
- before (None | int | str | datetime): (Union[int, str, datetime]): Get latest datapoint before this time. Not used when passing 'LatestDatapointQuery'.
+ before (None | int | str | datetime.datetime): (Union[int, str, datetime]): Get latest datapoint before this time. Not used when passing 'LatestDatapointQuery'.
target_unit (str | None): The unit_external_id of the datapoint returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
target_unit_system (str | None): The unit system of the datapoint returned. Cannot be used with target_unit.
include_status (bool): Also return the status code, an integer, for each datapoint in the response.
@@ -1320,8 +1359,8 @@ def insert(
self,
datapoints: Datapoints
| DatapointsArray
- | Sequence[dict[str, int | float | str | datetime]]
- | Sequence[tuple[int | float | datetime, int | float | str]],
+ | Sequence[dict[str, int | float | str | datetime.datetime]]
+ | Sequence[tuple[int | float | datetime.datetime, int | float | str]],
id: int | None = None,
external_id: str | None = None,
) -> None:
@@ -1334,7 +1373,7 @@ def insert(
`status codes. `_
Args:
- datapoints (Datapoints | DatapointsArray | Sequence[dict[str, int | float | str | datetime]] | Sequence[tuple[int | float | datetime, int | float | str]]): The datapoints you wish to insert. Can either be a list of tuples, a list of dictionaries, a Datapoints object or a DatapointsArray object. See examples below.
+ datapoints (Datapoints | DatapointsArray | Sequence[dict[str, int | float | str | datetime.datetime]] | Sequence[tuple[int | float | datetime.datetime, int | float | str]]): The datapoints you wish to insert. Can either be a list of tuples, a list of dictionaries, a Datapoints object or a DatapointsArray object. See examples below.
id (int | None): Id of time series to insert datapoints into.
external_id (str | None): External id of time series to insert datapoint into.
@@ -1468,16 +1507,16 @@ def insert_multiple(self, datapoints: list[dict[str, str | int | list | Datapoin
def delete_range(
self,
- start: int | str | datetime,
- end: int | str | datetime,
+ start: int | str | datetime.datetime,
+ end: int | str | datetime.datetime,
id: int | None = None,
external_id: str | None = None,
) -> None:
"""Delete a range of datapoints from a time series.
Args:
- start (int | str | datetime): Inclusive start of delete range
- end (int | str | datetime): Exclusive end of delete range
+ start (int | str | datetime.datetime): Inclusive start of delete range
+ end (int | str | datetime.datetime): Exclusive end of delete range
id (int | None): Id of time series to delete data from
external_id (str | None): External id of time series to delete data from
@@ -1582,7 +1621,7 @@ def insert_dataframe(self, df: pd.DataFrame, external_id_headers: bool = True, d
class _InsertDatapoint(NamedTuple):
- ts: int | datetime
+ ts: int | datetime.datetime
value: str | float
status_code: int | None = None
status_symbol: str | None = None
@@ -1761,7 +1800,7 @@ def __init__(
self,
id: None | int | LatestDatapointQuery | list[int | LatestDatapointQuery],
external_id: None | str | LatestDatapointQuery | list[str | LatestDatapointQuery],
- before: None | int | str | datetime,
+ before: None | int | str | datetime.datetime,
target_unit: None | str,
target_unit_system: None | str,
include_status: bool,
@@ -1777,7 +1816,7 @@ def __init__(
self.default_ignore_bad_datapoints = ignore_bad_datapoints
self.default_treat_uncertain_as_bad = treat_uncertain_as_bad
- self.settings_before: dict[tuple[str, int], None | int | str | datetime] = {}
+ self.settings_before: dict[tuple[str, int], None | int | str | datetime.datetime] = {}
self.settings_target_unit: dict[tuple[str, int], None | str] = {}
self.settings_target_unit_system: dict[tuple[str, int], None | str] = {}
self.settings_include_status: dict[tuple[str, int], bool | None] = {}
diff --git a/cognite/client/_version.py b/cognite/client/_version.py
index 7f93f0c66e..41b4fdc504 100644
--- a/cognite/client/_version.py
+++ b/cognite/client/_version.py
@@ -1,4 +1,4 @@
from __future__ import annotations
-__version__ = "7.44.1"
+__version__ = "7.45.0"
__api_subversion__ = "20230101"
diff --git a/cognite/client/data_classes/datapoints.py b/cognite/client/data_classes/datapoints.py
index d194c07b1d..aeb8cb69aa 100644
--- a/cognite/client/data_classes/datapoints.py
+++ b/cognite/client/data_classes/datapoints.py
@@ -1,11 +1,12 @@
from __future__ import annotations
+import contextlib
+import datetime
import json
import typing
import warnings
from collections import defaultdict
from dataclasses import InitVar, dataclass, fields
-from datetime import datetime
from enum import IntEnum
from functools import cached_property
from typing import (
@@ -17,6 +18,7 @@
Literal,
Sequence,
TypedDict,
+ cast,
overload,
)
@@ -29,7 +31,8 @@
from cognite.client.utils._identifier import Identifier
from cognite.client.utils._importing import local_import
from cognite.client.utils._pandas_helpers import (
- concat_dataframes_with_nullable_int_cols,
+ concat_dps_dataframe_list,
+ convert_tz_for_pandas,
notebook_display_with_fallback,
)
from cognite.client.utils._text import (
@@ -38,7 +41,12 @@
to_camel_case,
to_snake_case,
)
-from cognite.client.utils._time import convert_and_isoformat_time_attrs
+from cognite.client.utils._time import (
+ ZoneInfo,
+ convert_and_isoformat_timestamp,
+ convert_timezone_to_str,
+ parse_str_timezone,
+)
from cognite.client.utils.useful_types import SequenceNotStr
if NUMPY_IS_AVAILABLE:
@@ -115,6 +123,7 @@ class _DatapointsPayloadItem(TypedDict, total=False):
end: int
aggregates: list[str] | None
granularity: str | None
+ timeZone: str | None
targetUnit: str | None
targetUnitSystem: str | None
limit: int
@@ -122,6 +131,7 @@ class _DatapointsPayloadItem(TypedDict, total=False):
includeStatus: bool
ignoreBadDataPoints: bool
treatUncertainAsBad: bool
+ cursor: str | None
class _DatapointsPayload(_DatapointsPayloadItem):
@@ -142,6 +152,7 @@ class DatapointsQuery:
"end",
"aggregates",
"granularity",
+ "timezone",
"target_unit",
"target_unit_system",
"limit",
@@ -154,12 +165,13 @@ class DatapointsQuery:
)
id: InitVar[int | None] = None
external_id: InitVar[str | None] = None
- start: int | str | datetime = _NOT_SET # type: ignore [assignment]
- end: int | str | datetime = _NOT_SET # type: ignore [assignment]
- aggregates: Aggregate | list[Aggregate] = _NOT_SET # type: ignore [assignment]
- granularity: str = _NOT_SET # type: ignore [assignment]
- target_unit: str = _NOT_SET # type: ignore [assignment]
- target_unit_system: str = _NOT_SET # type: ignore [assignment]
+ start: int | str | datetime.datetime = _NOT_SET # type: ignore [assignment]
+ end: int | str | datetime.datetime = _NOT_SET # type: ignore [assignment]
+ aggregates: Aggregate | list[Aggregate] | None = _NOT_SET # type: ignore [assignment]
+ granularity: str | None = _NOT_SET # type: ignore [assignment]
+ timezone: str | datetime.timezone | ZoneInfo | None = _NOT_SET # type: ignore [assignment]
+ target_unit: str | None = _NOT_SET # type: ignore [assignment]
+ target_unit_system: str | None = _NOT_SET # type: ignore [assignment]
limit: int | None = _NOT_SET # type: ignore [assignment]
include_outside_points: bool = _NOT_SET # type: ignore [assignment]
ignore_unknown_ids: bool = _NOT_SET # type: ignore [assignment]
@@ -170,6 +182,8 @@ class DatapointsQuery:
def __post_init__(self, id: int | None, external_id: str | None) -> None:
# Ensure user have just specified one of id/xid:
self._identifier = Identifier.of_either(id, external_id)
+ # Store the possibly custom granularity (we support more than the API and a translation is done)
+ self._original_granularity = self.granularity
def __eq__(self, other: object) -> bool:
# Note: Instances representing identical queries should -not- compare equal as this would mean we
@@ -181,12 +195,6 @@ def __eq__(self, other: object) -> bool:
def __hash__(self) -> int:
return hash(id(self)) # See note on __eq__
- def _set_defaults(self, defaults: dict[str, Any]) -> None:
- # Used to merge in default values for any non-set parameter
- for fld in fields(self):
- if getattr(self, fld.name) is _NOT_SET:
- setattr(self, fld.name, defaults[fld.name])
-
@classmethod
# TODO: Remove in next major version (require use of DatapointsQuery directly)
def from_dict(cls, dct: dict[str, Any], id_type: Literal["id", "external_id"]) -> Self:
@@ -207,6 +215,18 @@ def from_dict(cls, dct: dict[str, Any], id_type: Literal["id", "external_id"]) -
def identifier(self) -> Identifier:
return self._identifier
+ @property
+ def original_granularity(self) -> str | None:
+ return self._original_granularity
+
+ @property
+ def original_timezone(self) -> datetime.timezone | ZoneInfo | None:
+ return self._original_timezone
+
+ @original_timezone.setter
+ def original_timezone(self, tz: datetime.timezone | ZoneInfo) -> None:
+ self._original_timezone = tz
+
@cached_property
def aggs_camel_case(self) -> list[str]:
return list(map(to_camel_case, self.aggregates or []))
@@ -221,11 +241,6 @@ def end_ms(self) -> int:
assert isinstance(self.end, int)
return self.end
- @property
- def finite_limit(self) -> int:
- assert isinstance(self.limit, int)
- return self.limit
-
@property
def is_raw_query(self) -> bool:
return self._is_raw_query
@@ -244,6 +259,19 @@ def is_missing(self, value: bool) -> None:
assert isinstance(value, bool)
self._is_missing = value
+ @property
+ def is_calendar_query(self) -> bool:
+ return self._is_calendar_query
+
+ @is_calendar_query.setter
+ def is_calendar_query(self, value: bool) -> None:
+ assert isinstance(value, bool)
+ self._is_calendar_query = value
+
+ @cached_property
+ def use_cursors(self) -> bool:
+ return bool(self.timezone or self.is_calendar_query)
+
@property
def max_query_limit(self) -> int:
return self._max_query_limit
@@ -259,10 +287,6 @@ def capped_limit(self) -> int:
return self.max_query_limit
return min(self.limit, self.max_query_limit)
- def override_max_query_limit(self, new_limit: int) -> None:
- assert isinstance(new_limit, int)
- self.max_query_limit = new_limit
-
def __repr__(self) -> str:
return json.dumps(self.dump(), indent=4)
@@ -277,7 +301,7 @@ def dump(self) -> dict[str, Any]:
def task_orchestrator(self) -> type[BaseTaskOrchestrator]:
from cognite.client._api.datapoint_tasks import get_task_orchestrator
- return get_task_orchestrator(self.is_raw_query, self.limit)
+ return get_task_orchestrator(self)
def to_payload_item(self) -> _DatapointsPayloadItem:
payload = _DatapointsPayloadItem(
@@ -295,7 +319,8 @@ def to_payload_item(self) -> _DatapointsPayloadItem:
payload["ignoreBadDataPoints"] = self.ignore_bad_datapoints
if self.treat_uncertain_as_bad is False:
payload["treatUncertainAsBad"] = self.treat_uncertain_as_bad
-
+ if self.timezone:
+ payload["timeZone"] = self.timezone
if self.is_raw_query:
if self.include_outside_points is True:
payload["includeOutsidePoints"] = self.include_outside_points
@@ -326,7 +351,7 @@ class LatestDatapointQuery:
id: InitVar[int | None] = None
external_id: InitVar[str | None] = None
- before: None | int | str | datetime = None
+ before: None | int | str | datetime.datetime = None
target_unit: str | None = None
target_unit_system: str | None = None
include_status: bool | None = None
@@ -366,6 +391,7 @@ class Datapoint(CogniteResource):
duration_uncertain (int | None): The duration the aggregate is defined and marked as uncertain (measured in milliseconds).
status_code (int | None): The status code for the raw datapoint.
status_symbol (str | None): The status symbol for the raw datapoint.
+ timezone (datetime.timezone | ZoneInfo | None): The timezone to use when displaying the datapoint.
"""
def __init__(
@@ -390,6 +416,7 @@ def __init__(
duration_uncertain: int | None = None,
status_code: int | None = None,
status_symbol: str | None = None,
+ timezone: datetime.timezone | ZoneInfo | None = None,
) -> None:
self.timestamp = timestamp
self.value = value
@@ -411,6 +438,12 @@ def __init__(
self.duration_uncertain = duration_uncertain
self.status_code = status_code
self.status_symbol = status_symbol
+ self.timezone = timezone
+
+ def __str__(self) -> str:
+ item = self.dump(camel_case=False)
+ item["timestamp"] = convert_and_isoformat_timestamp(cast(int, self.timestamp), self.timezone)
+ return _json.dumps(item, indent=4)
def to_pandas(self, camel_case: bool = False) -> pandas.DataFrame: # type: ignore[override]
"""Convert the datapoint into a pandas DataFrame.
@@ -425,12 +458,27 @@ def to_pandas(self, camel_case: bool = False) -> pandas.DataFrame: # type: igno
dumped = self.dump(camel_case=camel_case)
timestamp = dumped.pop("timestamp")
+ tz = convert_tz_for_pandas(self.timezone)
+ return pd.DataFrame(dumped, index=[pd.Timestamp(timestamp, unit="ms", tz=tz)])
- return pd.DataFrame(dumped, index=[pd.Timestamp(timestamp, unit="ms")])
+ @classmethod
+ def _load(cls, resource: dict[str, Any], cognite_client: CogniteClient | None = None) -> Self:
+ instance = super()._load(resource, cognite_client=cognite_client)
+ if isinstance(instance.timezone, str):
+ with contextlib.suppress(ValueError): # Dont fail load if invalid
+ instance.timezone = parse_str_timezone(instance.timezone)
+ return instance
- def dump(self, camel_case: bool = True) -> dict[str, Any]:
+ def dump(self, camel_case: bool = True, include_timezone: bool = True) -> dict[str, Any]:
+ dumped = super().dump(camel_case=camel_case)
# Keep value even if None (bad status codes support missing):
- return {"value": self.value, **super().dump(camel_case=camel_case)}
+ dumped["value"] = self.value # TODO: What if Datapoint represents one or more aggregates?
+ if include_timezone:
+ if self.timezone is not None:
+ dumped["timezone"] = convert_timezone_to_str(self.timezone)
+ else:
+ dumped.pop("timezone", None)
+ return dumped
class DatapointsArray(CogniteResource):
@@ -466,6 +514,7 @@ def __init__(
status_code: NumpyUInt32Array | None = None,
status_symbol: NumpyObjArray | None = None,
null_timestamps: set[int] | None = None,
+ timezone: datetime.timezone | ZoneInfo | None = None,
) -> None:
self.id = id
self.external_id = external_id
@@ -495,6 +544,7 @@ def __init__(
self.status_code = status_code
self.status_symbol = status_symbol
self.null_timestamps = null_timestamps
+ self.timezone = timezone
@property
def _ts_info(self) -> dict[str, Any]:
@@ -506,6 +556,7 @@ def _ts_info(self) -> dict[str, Any]:
"unit": self.unit,
"unit_external_id": self.unit_external_id,
"granularity": self.granularity,
+ "timezone": None if self.timezone is None else convert_timezone_to_str(self.timezone),
}
@classmethod
@@ -515,8 +566,7 @@ def _load_from_arrays(
cognite_client: CogniteClient | None = None,
) -> DatapointsArray:
assert isinstance(dps_dct["timestamp"], np.ndarray) # mypy love
- # Since pandas always uses nanoseconds for datetime, we stick with the same
- # (also future-proofs the SDK; ns may be coming!):
+ # We store datetime using nanosecond resolution to future-proof the SDK in case it is ever added:
dps_dct["timestamp"] = dps_dct["timestamp"].astype("datetime64[ms]").astype("datetime64[ns]")
return cls(**convert_all_keys_to_snake_case(dps_dct))
@@ -547,6 +597,10 @@ def _load(
array_by_attr["statusCode"] = np.array([s["code"] for s in status], dtype=np.uint32)
array_by_attr["statusSymbol"] = np.array([s["symbol"] for s in status], dtype=np.object_)
+ timezone = dps_dct.get("timezone")
+ if isinstance(timezone, str):
+ with contextlib.suppress(ValueError): # Dont fail load if invalid
+ timezone = parse_str_timezone(timezone)
return cls(
id=dps_dct.get("id"),
external_id=dps_dct.get("externalId"),
@@ -576,6 +630,7 @@ def _load(
status_code=array_by_attr.get("statusCode"),
status_symbol=array_by_attr.get("statusSymbol"),
null_timestamps=set(dps_dct["nullTimestamps"]) if "nullTimestamps" in dps_dct else None,
+ timezone=timezone, # type: ignore [arg-type]
)
@classmethod
@@ -626,12 +681,11 @@ def __getitem__(self, item: int | slice) -> Datapoint | DatapointsArray:
data: dict[str, float | str | None] = {
attr: numpy_dtype_fix(arr[item]) for attr, arr in zip(attrs[1:], arrays[1:])
}
-
if self.status_code is not None:
data.update(status_code=self.status_code[item], status_symbol=self.status_symbol[item]) # type: ignore [index]
if self.null_timestamps and timestamp in self.null_timestamps:
data["value"] = None
- return Datapoint(timestamp=timestamp, **data) # type: ignore [arg-type]
+ return Datapoint(timestamp=timestamp, **data, timezone=self.timezone) # type: ignore [arg-type]
def _slice(self, part: slice) -> DatapointsArray:
data: dict[str, Any] = {attr: arr[part] for attr, arr in zip(*self._data_fields())}
@@ -657,7 +711,7 @@ def __iter__(self) -> Iterator[Datapoint]:
"Iterating through a DatapointsArray is very inefficient. Tip: Access the arrays directly and use "
"vectorised numpy ops on those. E.g. `dps.average` for the 'average' aggregate, `dps.value` for the "
"raw datapoints or `dps.timestamp` for the timestamps. You may also convert to a pandas DataFrame using "
- "`dps.to_pandas()`.",
+ "`dps.to_pandas()`. In the next major version, iteration will no longer be possible.",
UserWarning,
)
attrs, arrays = self._data_fields()
@@ -670,7 +724,7 @@ def __iter__(self) -> Iterator[Datapoint]:
if self.null_timestamps and timestamp in self.null_timestamps:
data["value"] = None
- yield Datapoint(timestamp=timestamp, **data) # type: ignore [arg-type]
+ yield Datapoint(timestamp=timestamp, **data, timezone=self.timezone) # type: ignore [arg-type]
def _data_fields(self) -> tuple[list[str], list[npt.NDArray]]:
# Note: Does not return status-related fields
@@ -698,12 +752,23 @@ def dump(self, camel_case: bool = True, convert_timestamps: bool = False) -> dic
else:
# Note: numpy does not have a strftime method to get the exact format we want (hence the datetime detour)
# and for some weird reason .astype(datetime) directly from dt64 returns native integer... whatwhyy
- arrays[0] = arrays[0].astype("datetime64[ms]").astype(datetime).astype(str)
+ if self.timezone is None:
+ arrays[0] = arrays[0].astype("datetime64[ms]").astype(datetime.datetime).astype(str)
+ else:
+ arrays[0] = np.array(
+ [
+ convert_and_isoformat_timestamp(ts, self.timezone)
+ for ts in arrays[0].astype("datetime64[ms]").astype(np.int64).tolist()
+ ],
+ dtype=str,
+ )
if camel_case:
attrs = list(map(to_camel_case, attrs))
dumped = self._ts_info
+ if self.timezone is not None:
+ dumped["timezone"] = str(self.timezone)
datapoints = [dict(zip(attrs, map(numpy_dtype_fix, row))) for row in zip(*arrays)]
if self.status_code is not None or self.status_symbol is not None:
@@ -769,6 +834,10 @@ def to_pandas( # type: ignore [override]
else:
raise ValueError("Argument `column_names` must be either 'external_id' or 'id'")
+ idx, tz = self.timestamp, self.timezone
+ if tz is not None:
+ idx = pd.to_datetime(idx, utc=True).tz_convert(convert_tz_for_pandas(tz))
+
if self.value is not None:
raw_columns: dict[str, npt.NDArray] = {identifier: self.value}
if include_status:
@@ -776,7 +845,7 @@ def to_pandas( # type: ignore [override]
raw_columns[f"{identifier}|status_code"] = self.status_code
if self.status_symbol is not None:
raw_columns[f"{identifier}|status_symbol"] = self.status_symbol
- return pd.DataFrame(raw_columns, index=self.timestamp, copy=False)
+ return pd.DataFrame(raw_columns, index=idx, copy=False)
(_, *agg_names), (_, *arrays) = self._data_fields()
aggregate_columns = [
@@ -785,7 +854,7 @@ def to_pandas( # type: ignore [override]
]
# Since columns might contain duplicates, we can't instantiate from dict as only the
# last key (array/column) would be kept:
- (df := pd.DataFrame(dict(enumerate(arrays)), index=self.timestamp, copy=False)).columns = aggregate_columns
+ (df := pd.DataFrame(dict(enumerate(arrays)), index=idx, copy=False)).columns = aggregate_columns
return df
@@ -821,6 +890,7 @@ class Datapoints(CogniteResource):
status_code (list[int] | None): The status codes for the raw datapoints.
status_symbol (list[str] | None): The status symbols for the raw datapoints.
error (list[None | str] | None): Human readable strings with description of what went wrong (returned by synthetic datapoints queries).
+ timezone (datetime.timezone | ZoneInfo | None): The timezone to use when displaying the datapoints.
"""
def __init__(
@@ -853,6 +923,7 @@ def __init__(
status_code: list[int] | None = None,
status_symbol: list[str] | None = None,
error: list[None | str] | None = None,
+ timezone: datetime.timezone | ZoneInfo | None = None,
) -> None:
self.id = id
self.external_id = external_id
@@ -882,13 +953,15 @@ def __init__(
self.status_code = status_code
self.status_symbol = status_symbol
self.error = error
+ self.timezone = timezone
self.__datapoint_objects: list[Datapoint] | None = None
def __str__(self) -> str:
- item = self.dump()
- item["datapoints"] = convert_and_isoformat_time_attrs(item["datapoints"])
- return _json.dumps(item, indent=4)
+ dumped = self.dump()
+ for dct in dumped["datapoints"]:
+ dct["timestamp"] = convert_and_isoformat_timestamp(dct["timestamp"], self.timezone)
+ return _json.dumps(dumped, indent=4)
def __len__(self) -> int:
return len(self.timestamp)
@@ -910,7 +983,7 @@ def __getitem__(self, item: slice) -> Datapoints: ...
def __getitem__(self, item: int | slice) -> Datapoint | Datapoints:
if isinstance(item, slice):
return self._slice(item)
- dp_args = {}
+ dp_args: dict[str, Any] = {"timezone": self.timezone}
for attr, values in self._get_non_empty_data_fields():
dp_args[attr] = values[item]
@@ -939,7 +1012,9 @@ def dump(self, camel_case: bool = True) -> dict[str, Any]:
"unit": self.unit,
"unit_external_id": self.unit_external_id,
}
- datapoints = [dp.dump(camel_case=camel_case) for dp in self.__get_datapoint_objects()]
+ if self.timezone is not None:
+ dumped["timezone"] = convert_timezone_to_str(self.timezone)
+ datapoints = [dp.dump(camel_case=camel_case, include_timezone=False) for dp in self.__get_datapoint_objects()]
if self.status_code is not None or self.status_symbol is not None:
if (
self.status_code is None
@@ -1021,7 +1096,10 @@ def to_pandas( # type: ignore [override]
else:
data_lists.append(data.astype("float64"))
- idx = pd.to_datetime(self.timestamp, unit="ms")
+ if (tz := self.timezone) is None:
+ idx = pd.to_datetime(self.timestamp, unit="ms")
+ else:
+ idx = pd.to_datetime(self.timestamp, unit="ms", utc=True).tz_convert(convert_tz_for_pandas(tz))
(df := pd.DataFrame(dict(enumerate(data_lists)), index=idx)).columns = field_names
return df
@@ -1070,6 +1148,8 @@ def _load( # type: ignore [override]
for row in dps_object["datapoints"]:
for attr, value in row.items():
data_lists[attr].append(value)
+ if (timezone := dps_object.get("timezone")) is not None:
+ instance.timezone = parse_str_timezone(timezone)
if (status := data_lists.pop("status", None)) is not None:
data_lists["status_code"] = [s["code"] for s in status]
data_lists["status_symbol"] = [s["symbol"] for s in status]
@@ -1080,6 +1160,7 @@ def _load( # type: ignore [override]
return instance
def _extend(self, other_dps: Datapoints) -> None:
+ # TODO: Only used by synthetic time series API, consider removing in a refactoring.
if self.id is None and self.external_id is None:
self.id = other_dps.id
self.external_id = other_dps.external_id
@@ -1087,6 +1168,7 @@ def _extend(self, other_dps: Datapoints) -> None:
self.is_step = other_dps.is_step
self.unit = other_dps.unit
self.unit_external_id = other_dps.unit_external_id
+ self.timezone = other_dps.timezone
for attr, other_value in other_dps._get_non_empty_data_fields(get_empty_lists=True):
value = getattr(self, attr)
@@ -1109,6 +1191,7 @@ def _get_non_empty_data_fields(
"granularity",
"status_code",
"status_symbol",
+ "timezone",
}
for attr, value in self.__dict__.copy().items():
if attr not in skip_attrs and attr[0] != "_" and (attr != "error" or get_error):
@@ -1123,14 +1206,14 @@ def __get_datapoint_objects(self) -> list[Datapoint]:
fields = self._get_non_empty_data_fields(get_error=False)
new_dps_objects = []
for i in range(len(self)):
- dp_args = {}
+ dp_args: dict[str, Any] = {"timezone": self.timezone}
for attr, value in fields:
dp_args[attr] = value[i]
- if self.status_code is not None:
- dp_args.update(
- statusCode=self.status_code[i],
- statusSymbol=self.status_symbol[i], # type: ignore [index]
- )
+ if self.status_code is not None:
+ dp_args.update(
+ statusCode=self.status_code[i],
+ statusSymbol=self.status_symbol[i], # type: ignore [index]
+ )
new_dps_objects.append(Datapoint.load(dp_args))
self.__datapoint_objects = new_dps_objects
return self.__datapoint_objects
@@ -1144,6 +1227,7 @@ def _slice(self, slice: slice) -> Datapoints:
unit=self.unit,
unit_external_id=self.unit_external_id,
granularity=self.granularity,
+ timezone=self.timezone,
)
for attr, value in self._get_non_empty_data_fields():
setattr(truncated_datapoints, attr, value[slice])
@@ -1257,20 +1341,13 @@ def to_pandas( # type: ignore [override]
Returns:
pandas.DataFrame: The datapoints as a pandas DataFrame.
"""
- pd = local_import("pandas")
- dfs = [
- dps.to_pandas(
- column_names=column_names,
- include_aggregate_name=include_aggregate_name,
- include_granularity_name=include_granularity_name,
- include_status=include_status,
- )
- for dps in self
- ]
- if not dfs:
- return pd.DataFrame(index=pd.to_datetime([]))
-
- return concat_dataframes_with_nullable_int_cols(dfs)
+ return concat_dps_dataframe_list(
+ self,
+ column_names=column_names,
+ include_aggregate_name=include_aggregate_name,
+ include_granularity_name=include_granularity_name,
+ include_status=include_status,
+ )
def dump(self, camel_case: bool = True, convert_timestamps: bool = False) -> list[dict[str, Any]]:
"""Dump the instance into a json serializable Python data type.
@@ -1326,10 +1403,11 @@ def get( # type: ignore [override]
return super().get(id, external_id)
def __str__(self) -> str:
- item = self.dump()
- for i in item:
- i["datapoints"] = convert_and_isoformat_time_attrs(i["datapoints"])
- return _json.dumps(item, indent=4)
+ dumped = self.dump()
+ for dps, item in zip(self, dumped):
+ for dct in item["datapoints"]:
+ dct["timestamp"] = convert_and_isoformat_timestamp(dct["timestamp"], dps.timezone)
+ return _json.dumps(dumped, indent=4)
def to_pandas( # type: ignore [override]
self,
@@ -1349,17 +1427,10 @@ def to_pandas( # type: ignore [override]
Returns:
pandas.DataFrame: The datapoints list as a pandas DataFrame.
"""
- pd = local_import("pandas")
- dfs = [
- dps.to_pandas(
- column_names=column_names,
- include_aggregate_name=include_aggregate_name,
- include_granularity_name=include_granularity_name,
- include_status=include_status,
- )
- for dps in self
- ]
- if not dfs:
- return pd.DataFrame(index=pd.to_datetime([]))
-
- return concat_dataframes_with_nullable_int_cols(dfs)
+ return concat_dps_dataframe_list(
+ self,
+ column_names=column_names,
+ include_aggregate_name=include_aggregate_name,
+ include_granularity_name=include_granularity_name,
+ include_status=include_status,
+ )
diff --git a/cognite/client/utils/__init__.py b/cognite/client/utils/__init__.py
index 343d4d6b5e..a864e87bfd 100644
--- a/cognite/client/utils/__init__.py
+++ b/cognite/client/utils/__init__.py
@@ -1,24 +1,15 @@
from __future__ import annotations
-import contextlib
-import sys
-
from cognite.client.utils._time import (
MAX_TIMESTAMP_MS,
MIN_TIMESTAMP_MS,
+ ZoneInfo,
datetime_to_ms,
datetime_to_ms_iso_timestamp,
ms_to_datetime,
timestamp_to_ms,
)
-# Needed for doctest to pass.
-with contextlib.suppress(ImportError):
- if sys.version_info >= (3, 9):
- from zoneinfo import ZoneInfo
- else:
- from backports.zoneinfo import ZoneInfo
-
__all__ = [
"ZoneInfo",
"MAX_TIMESTAMP_MS",
diff --git a/cognite/client/utils/_importing.py b/cognite/client/utils/_importing.py
index a2c9ace42f..4d2b4aae4a 100644
--- a/cognite/client/utils/_importing.py
+++ b/cognite/client/utils/_importing.py
@@ -7,6 +7,7 @@
if TYPE_CHECKING:
from concurrent.futures import Future
+
_T = TypeVar("_T")
diff --git a/cognite/client/utils/_pandas_helpers.py b/cognite/client/utils/_pandas_helpers.py
index b326503169..52f2a88f7e 100644
--- a/cognite/client/utils/_pandas_helpers.py
+++ b/cognite/client/utils/_pandas_helpers.py
@@ -2,19 +2,21 @@
import re
import warnings
+from datetime import timezone
from inspect import signature
from itertools import chain
from numbers import Integral
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING, Any, Literal, Sequence
from cognite.client.exceptions import CogniteImportError
from cognite.client.utils._importing import local_import
from cognite.client.utils._text import to_camel_case
-from cognite.client.utils._time import TIME_ATTRIBUTES
+from cognite.client.utils._time import TIME_ATTRIBUTES, ZoneInfo
if TYPE_CHECKING:
import pandas as pd
+ from cognite.client.data_classes import DatapointsArrayList, DatapointsList
from cognite.client.data_classes._base import T_CogniteResource, T_CogniteResourceList
@@ -43,6 +45,48 @@ def pandas_major_version() -> int:
return int(__version__.split(".")[0])
+def convert_tz_for_pandas(tz: str | timezone | ZoneInfo | None) -> str | timezone | None:
+ if tz is None or isinstance(tz, (str, timezone)):
+ return tz
+ if isinstance(tz, ZoneInfo):
+ # pandas is not happy about ZoneInfo :shrug:
+ if tz.key is not None:
+ return tz.key
+ raise ValueError("timezone of type ZoneInfo does not have the required 'key' attribute set")
+ raise ValueError(f"'timezone' not understood, expected one of: [None, datetime.timezone, ZoneInfo], got {type(tz)}")
+
+
+def concat_dps_dataframe_list(
+ dps_lst: DatapointsList | DatapointsArrayList,
+ column_names: Literal["id", "external_id"],
+ include_aggregate_name: bool,
+ include_granularity_name: bool,
+ include_status: bool,
+) -> pd.DataFrame:
+ pd = local_import("pandas")
+ dfs = [
+ dps.to_pandas(
+ column_names=column_names,
+ include_aggregate_name=include_aggregate_name,
+ include_granularity_name=include_granularity_name,
+ include_status=include_status,
+ )
+ for dps in dps_lst
+ ]
+ if not dfs:
+ return pd.DataFrame(index=pd.to_datetime([]))
+ timezones = set(dps.timezone for dps in dps_lst) - {None}
+ # If attempting to join naive & aware, pandas will raise (so we don't need to):
+ # TypeError: Cannot join tz-naive with tz-aware DatetimeIndex
+ if len(timezones) > 1:
+ warnings.warn(
+ f"When concatenating datapoints localized to different timezones ({sorted(map(str, timezones))}), the "
+ "final dataframe index (timestamps) will be a union of the UTC converted timestamps.",
+ UserWarning,
+ )
+ return concat_dataframes_with_nullable_int_cols(dfs)
+
+
def notebook_display_with_fallback(inst: T_CogniteResource | T_CogniteResourceList, **kwargs: Any) -> str:
if "camel_case" in signature(inst.to_pandas).parameters:
# Default of False enforced (when accepted by method):
diff --git a/cognite/client/utils/_time.py b/cognite/client/utils/_time.py
index 7595d5f250..232ea39dd1 100644
--- a/cognite/client/utils/_time.py
+++ b/cognite/client/utils/_time.py
@@ -9,22 +9,22 @@
from abc import ABC, abstractmethod
from contextlib import suppress
from datetime import datetime, timedelta, timezone
-from typing import TYPE_CHECKING, overload
+from typing import TYPE_CHECKING, cast, overload
from cognite.client.utils._importing import local_import
from cognite.client.utils._text import to_camel_case
+if sys.version_info >= (3, 9):
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+else:
+ from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
if TYPE_CHECKING:
from datetime import tzinfo
import pandas
- if sys.version_info >= (3, 9):
- from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
- else:
- from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
-
-
+UTC = ZoneInfo("UTC") # type: ignore [abstract]
UNIT_IN_MS_WITHOUT_WEEK = {"s": 1000, "m": 60000, "h": 3600000, "d": 86400000}
UNIT_IN_MS = {**UNIT_IN_MS_WITHOUT_WEEK, "w": 604800000}
VARIABLE_LENGTH_UNITS = {"month", "quarter", "year"}
@@ -32,36 +32,88 @@
GRANULARITY_IN_TIMEDELTA_UNIT = {"w": "weeks", "d": "days", "h": "hours", "m": "minutes", "s": "seconds"}
MIN_TIMESTAMP_MS = -2208988800000 # 1900-01-01 00:00:00.000
MAX_TIMESTAMP_MS = 4102444799999 # 2099-12-31 23:59:59.999
+_GRANULARITY_UNIT_LOOKUP: dict[str, str] = {
+ "s": "s",
+ "sec": "s",
+ "second": "s",
+ "seconds": "s",
+ "t": "m",
+ "m": "m",
+ "min": "m",
+ "minute": "m",
+ "minutes": "m",
+ "h": "h",
+ "hour": "h",
+ "hours": "h",
+ "d": "d",
+ "day": "d",
+ "days": "d",
+ "w": "w",
+ "week": "w",
+ "weeks": "w",
+ "mo": "month",
+ "month": "month",
+ "months": "month",
+ "q": "quarter",
+ "quarter": "quarter",
+ "quarters": "quarter",
+ "y": "year",
+ "year": "year",
+ "years": "year",
+}
+_GRANULARITY_CONVERSION = {
+ "s": (1, "s"),
+ "m": (1, "m"),
+ "h": (1, "h"),
+ "d": (1, "d"),
+ "w": (7, "d"),
+ "month": (1, "mo"),
+ "quarter": (3, "mo"),
+ "year": (12, "mo"),
+}
-def import_zoneinfo() -> type[ZoneInfo]:
- try:
- if sys.version_info >= (3, 9):
- from zoneinfo import ZoneInfo
- else:
- from backports.zoneinfo import ZoneInfo
- return ZoneInfo
-
- except ImportError as e:
- from cognite.client.exceptions import CogniteImportError
-
- raise CogniteImportError(
- "ZoneInfo is part of the standard library starting with Python >=3.9. In earlier versions "
- "you need to install a backport. This is done automatically for you when installing with the pandas "
- "group: 'cognite-sdk[pandas]', or with poetry: 'poetry install -E pandas'"
- ) from e
-
-
-def _import_zoneinfo_not_found_error() -> type[ZoneInfoNotFoundError]:
- if sys.version_info >= (3, 9):
- from zoneinfo import ZoneInfoNotFoundError
- else:
- from backports.zoneinfo import ZoneInfoNotFoundError
- return ZoneInfoNotFoundError
+def parse_str_timezone_offset(tz: str) -> timezone:
+ """
+ This function attempts to accept and convert all valid fixed-offset timezone input that the API
+ supports for datapoints endpoints. The backend is using native java class TimeZone with some
+ added restrictions on ambiguous names/ids.
+ """
+ prefix, tz = "", tz.replace(" ", "")
+ if match := re.match("^(UTC?|GMT)?", tz):
+ tz = tz.replace(prefix := match.group(), "")
+ if prefix and not tz:
+ return timezone.utc
+ elif re.match(r"^(-|\+)\d\d?$", tz) and abs(hours_offset := int(tz)) <= 18:
+ return timezone(timedelta(hours=hours_offset))
+ return cast(timezone, datetime.strptime(tz, "%z").tzinfo)
-def get_utc_zoneinfo() -> ZoneInfo:
- return import_zoneinfo()("UTC")
+def parse_str_timezone(tz: str) -> timezone | ZoneInfo:
+ try:
+ return ZoneInfo(tz) # type: ignore [abstract]
+ except ZoneInfoNotFoundError:
+ try:
+ return parse_str_timezone_offset(tz)
+ except ValueError:
+ raise ValueError(
+ f"Unable to parse string timezone {tz!r}, expected an UTC offset like UTC-02, UTC+01:30, +0400 "
+ "or an IANA timezone on the format Region/Location like Europe/Oslo, Asia/Tokyo or America/Los_Angeles"
+ )
+
+
+def convert_timezone_to_str(tz: timezone | ZoneInfo) -> str:
+ if isinstance(tz, timezone):
+ # Built-in timezones can only represent fixed UTC offsets (i.e. we do not allow arbitrary
+ # tzinfo subclasses). We could do str(tz), but if the user has passed a name, that is
+ # returned instead so we have to first get the utc offset:
+ return str(timezone(tz.utcoffset(None)))
+ elif isinstance(tz, ZoneInfo):
+ if tz.key is not None:
+ return tz.key
+ else:
+ raise ValueError("timezone of type ZoneInfo does not have the required 'key' attribute set")
+ raise TypeError(f"timezone must be datetime.timezone or zoneinfo.ZoneInfo, not {type(tz)}")
def datetime_to_ms(dt: datetime) -> int:
@@ -118,8 +170,21 @@ def datetime_to_ms_iso_timestamp(dt: datetime) -> str:
if dt.tzinfo is None:
dt = dt.astimezone()
return dt.isoformat(timespec="milliseconds")
- else:
- raise TypeError(f"Expected datetime object, got {type(dt)}")
+ raise TypeError(f"Expected datetime object, got {type(dt)}")
+
+
+def split_granularity_into_quantity_and_normalized_unit(granularity: str) -> tuple[int, str]:
+ """A normalized unit is any unit accepted by the API"""
+ if match := re.match(r"(\d+)(.*)", granularity):
+ quantity, unit = match.groups()
+ # We accept a whole range of different formats like s, sec, second
+ if normalized_unit := _GRANULARITY_UNIT_LOOKUP.get(unit):
+ multiplier, normalized_unit = _GRANULARITY_CONVERSION[normalized_unit]
+ return int(quantity) * multiplier, normalized_unit
+ raise ValueError(
+ f"Invalid granularity format: `{granularity}`. Must be on format , e.g. 5m, 3h, 1d, or 2w. "
+ "Tip: Unit can be spelled out for clarity, e.g. week(s), month(s), quarter(s), or year(s)."
+ )
def time_string_to_ms(pattern: str, string: str, unit_in_ms: dict[str, int]) -> int | None:
@@ -206,6 +271,14 @@ def timestamp_to_ms(timestamp: int | float | str | datetime) -> int:
TIME_ATTRIBUTES |= set(map(to_camel_case, TIME_ATTRIBUTES))
+def convert_and_isoformat_timestamp(ts: int, tz: timezone | ZoneInfo | None) -> str:
+ """Used in datapoints classes that are fetched with a 'timezone'"""
+ dt = ms_to_datetime(ts)
+ if tz is not None:
+ dt = dt.astimezone(tz)
+ return dt.isoformat(sep=" ", timespec="milliseconds")
+
+
def _convert_and_isoformat_time_attrs_in_dict(item: dict) -> dict:
for k in TIME_ATTRIBUTES.intersection(item):
try:
@@ -474,36 +547,6 @@ def split_time_range(start: int, end: int, n_splits: int, granularity_in_ms: int
return [*(start + delta_ms * i for i in range(n_splits)), end]
-_GRANULARITY_UNIT_LOOKUP: dict[str, str] = {
- "s": "s",
- "sec": "s",
- "second": "s",
- "seconds": "s",
- "t": "m",
- "m": "m",
- "min": "m",
- "minute": "m",
- "minutes": "m",
- "h": "h",
- "hour": "h",
- "hours": "h",
- "d": "d",
- "day": "d",
- "days": "d",
- "w": "w",
- "week": "w",
- "weeks": "w",
- "month": "month",
- "months": "month",
- "q": "quarter",
- "quarter": "quarter",
- "quarters": "quarter",
- "y": "year",
- "year": "year",
- "years": "year",
-}
-
-
def get_granularity_multiplier_and_unit(granularity: str) -> tuple[int, str]:
if granularity and granularity[0].isdigit():
_, number, unit = re.split(r"(\d+)", granularity)
@@ -541,11 +584,10 @@ def _to_fixed_utc_intervals_variable_unit_length(
) -> list[dict[str, datetime | str]]:
freq = to_pandas_freq(f"{multiplier}{unit}", start)
index = pandas_date_range_tz(start, end, freq)
- utc = get_utc_zoneinfo()
return [
{
- "start": start.to_pydatetime().astimezone(utc),
- "end": end.to_pydatetime().astimezone(utc),
+ "start": start.to_pydatetime().astimezone(UTC),
+ "end": end.to_pydatetime().astimezone(UTC),
"granularity": f"{_check_max_granularity_limit((end - start) // timedelta(hours=1), granularity)}h",
}
for start, end in zip(index[:-1], index[1:])
@@ -562,7 +604,6 @@ def _to_fixed_utc_intervals_fixed_unit_length(
transition_raw = index[(utc_offsets != utc_offsets.shift(-1)) | (utc_offsets != utc_offsets.shift(1))]
transitions = []
- utc = get_utc_zoneinfo()
freq = multiplier * GRANULARITY_IN_HOURS[unit]
hour, zero = pd.Timedelta(hours=1), pd.Timedelta(0)
for t_start, t_end in zip(transition_raw[:-1], transition_raw[1:]):
@@ -579,8 +620,8 @@ def _to_fixed_utc_intervals_fixed_unit_length(
transitions.append(
{
- "start": t_start.to_pydatetime().astimezone(utc),
- "end": t_end.to_pydatetime().astimezone(utc),
+ "start": t_start.to_pydatetime().astimezone(UTC),
+ "end": t_end.to_pydatetime().astimezone(UTC),
"granularity": f"{_check_max_granularity_limit(freq + dst_adjustment, granularity)}h",
}
)
@@ -589,14 +630,14 @@ def _to_fixed_utc_intervals_fixed_unit_length(
def pandas_date_range_tz(start: datetime, end: datetime, freq: str, inclusive: str = "both") -> pandas.DatetimeIndex:
"""
- Pandas date_range struggles with time zone aware datetimes.
+ Pandas date_range struggles with timezone aware datetimes.
This function overcomes that limitation.
Assumes that start and end have the same timezone.
"""
pd = local_import("pandas")
- # There is a bug in date_range which makes it fail to handle ambiguous timestamps when you use time zone aware
- # datetimes. This is a workaround by passing the time zone as an argument to the function.
+ # There is a bug in date_range which makes it fail to handle ambiguous timestamps when you use timezone aware
+ # datetimes. This is a workaround by passing the timezone as an argument to the function.
# In addition, pandas struggle with ZoneInfo objects, so we convert them to string so that pandas can use its own
# tzdata implementation.
@@ -622,7 +663,7 @@ def _timezones_are_equal(start_tz: tzinfo, end_tz: tzinfo) -> bool:
-except- when given something concrete like pytz.UTC or ZoneInfo(...).
To make sure we don't raise something silly like 'UTC != UTC', we convert both to ZoneInfo for comparison
- via str(). This is safe as all return the lookup key (for the IANA time zone database).
+ via str(). This is safe as all return the lookup key (for the IANA timezone database).
Note:
We do not consider timezones with different keys, but equal fixed offsets from UTC to be equal. An example
@@ -630,10 +671,9 @@ def _timezones_are_equal(start_tz: tzinfo, end_tz: tzinfo) -> bool:
"""
if start_tz is end_tz:
return True
- ZoneInfo, ZoneInfoNotFoundError = import_zoneinfo(), _import_zoneinfo_not_found_error()
with suppress(ValueError, ZoneInfoNotFoundError):
# ValueError is raised for non-conforming keys (ZoneInfoNotFoundError is self-explanatory)
- if ZoneInfo(str(start_tz)) is ZoneInfo(str(end_tz)):
+ if ZoneInfo(str(start_tz)) is ZoneInfo(str(end_tz)): # type: ignore [abstract]
return True
return False
@@ -648,13 +688,12 @@ def validate_timezone(start: datetime, end: datetime) -> ZoneInfo:
if not _timezones_are_equal(start_tz, end_tz):
raise ValueError(f"'start' and 'end' represent different timezones: '{start_tz}' and '{end_tz}'.")
- ZoneInfo = import_zoneinfo()
if isinstance(start_tz, ZoneInfo):
return start_tz
pd = local_import("pandas")
if isinstance(start, pd.Timestamp):
- return ZoneInfo(str(start_tz))
+ return ZoneInfo(str(start_tz)) # type: ignore [abstract]
raise ValueError("Only tz-aware pandas.Timestamp and datetime (must be using ZoneInfo) are supported.")
@@ -678,3 +717,6 @@ def to_pandas_freq(granularity: str, start: datetime) -> str:
floored = QuarterAligner.floor(start)
unit += {1: "-JAN", 4: "-APR", 7: "-JUL", 10: "-OCT"}[floored.month]
return f"{multiplier}{unit}"
+
+
+__all__ = ["ZoneInfo", "ZoneInfoNotFoundError"] # Fix: Module does not explicitly export attribute "ZoneInfo
diff --git a/poetry.lock b/poetry.lock
index 5a012a7f42..f6a097d51d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -89,24 +89,24 @@ files = [
[[package]]
name = "backports-tarfile"
-version = "1.1.1"
+version = "1.2.0"
description = "Backport of CPython tarfile module"
optional = false
python-versions = ">=3.8"
files = [
- {file = "backports.tarfile-1.1.1-py3-none-any.whl", hash = "sha256:73e0179647803d3726d82e76089d01d8549ceca9bace469953fcb4d97cf2d417"},
- {file = "backports_tarfile-1.1.1.tar.gz", hash = "sha256:9c2ef9696cb73374f7164e17fc761389393ca76777036f5aad42e8b93fcd8009"},
+ {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
+ {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
]
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["jaraco.test", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
+testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
[[package]]
name = "backports-zoneinfo"
version = "0.2.1"
description = "Backport of the standard library zoneinfo module"
-optional = true
+optional = false
python-versions = ">=3.6"
files = [
{file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"},
@@ -127,9 +127,6 @@ files = [
{file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"},
]
-[package.dependencies]
-tzdata = {version = "*", optional = true, markers = "extra == \"tzdata\""}
-
[package.extras]
tzdata = ["tzdata"]
@@ -379,63 +376,63 @@ files = [
[[package]]
name = "coverage"
-version = "7.5.1"
+version = "7.5.3"
description = "Code coverage measurement for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "coverage-7.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0884920835a033b78d1c73b6d3bbcda8161a900f38a488829a83982925f6c2e"},
- {file = "coverage-7.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:39afcd3d4339329c5f58de48a52f6e4e50f6578dd6099961cf22228feb25f38f"},
- {file = "coverage-7.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b0ceee8147444347da6a66be737c9d78f3353b0681715b668b72e79203e4a"},
- {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a9ca3f2fae0088c3c71d743d85404cec8df9be818a005ea065495bedc33da35"},
- {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd215c0c7d7aab005221608a3c2b46f58c0285a819565887ee0b718c052aa4e"},
- {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4bf0655ab60d754491004a5efd7f9cccefcc1081a74c9ef2da4735d6ee4a6223"},
- {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:61c4bf1ba021817de12b813338c9be9f0ad5b1e781b9b340a6d29fc13e7c1b5e"},
- {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db66fc317a046556a96b453a58eced5024af4582a8dbdc0c23ca4dbc0d5b3146"},
- {file = "coverage-7.5.1-cp310-cp310-win32.whl", hash = "sha256:b016ea6b959d3b9556cb401c55a37547135a587db0115635a443b2ce8f1c7228"},
- {file = "coverage-7.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:df4e745a81c110e7446b1cc8131bf986157770fa405fe90e15e850aaf7619bc8"},
- {file = "coverage-7.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:796a79f63eca8814ca3317a1ea443645c9ff0d18b188de470ed7ccd45ae79428"},
- {file = "coverage-7.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fc84a37bfd98db31beae3c2748811a3fa72bf2007ff7902f68746d9757f3746"},
- {file = "coverage-7.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6175d1a0559986c6ee3f7fccfc4a90ecd12ba0a383dcc2da30c2b9918d67d8a3"},
- {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fc81d5878cd6274ce971e0a3a18a8803c3fe25457165314271cf78e3aae3aa2"},
- {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:556cf1a7cbc8028cb60e1ff0be806be2eded2daf8129b8811c63e2b9a6c43bca"},
- {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9981706d300c18d8b220995ad22627647be11a4276721c10911e0e9fa44c83e8"},
- {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d7fed867ee50edf1a0b4a11e8e5d0895150e572af1cd6d315d557758bfa9c057"},
- {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef48e2707fb320c8f139424a596f5b69955a85b178f15af261bab871873bb987"},
- {file = "coverage-7.5.1-cp311-cp311-win32.whl", hash = "sha256:9314d5678dcc665330df5b69c1e726a0e49b27df0461c08ca12674bcc19ef136"},
- {file = "coverage-7.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fa567e99765fe98f4e7d7394ce623e794d7cabb170f2ca2ac5a4174437e90dd"},
- {file = "coverage-7.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b6cf3764c030e5338e7f61f95bd21147963cf6aa16e09d2f74f1fa52013c1206"},
- {file = "coverage-7.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ec92012fefebee89a6b9c79bc39051a6cb3891d562b9270ab10ecfdadbc0c34"},
- {file = "coverage-7.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16db7f26000a07efcf6aea00316f6ac57e7d9a96501e990a36f40c965ec7a95d"},
- {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beccf7b8a10b09c4ae543582c1319c6df47d78fd732f854ac68d518ee1fb97fa"},
- {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8748731ad392d736cc9ccac03c9845b13bb07d020a33423fa5b3a36521ac6e4e"},
- {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7352b9161b33fd0b643ccd1f21f3a3908daaddf414f1c6cb9d3a2fd618bf2572"},
- {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a588d39e0925f6a2bff87154752481273cdb1736270642aeb3635cb9b4cad07"},
- {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:68f962d9b72ce69ea8621f57551b2fa9c70509af757ee3b8105d4f51b92b41a7"},
- {file = "coverage-7.5.1-cp312-cp312-win32.whl", hash = "sha256:f152cbf5b88aaeb836127d920dd0f5e7edff5a66f10c079157306c4343d86c19"},
- {file = "coverage-7.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:5a5740d1fb60ddf268a3811bcd353de34eb56dc24e8f52a7f05ee513b2d4f596"},
- {file = "coverage-7.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e2213def81a50519d7cc56ed643c9e93e0247f5bbe0d1247d15fa520814a7cd7"},
- {file = "coverage-7.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5037f8fcc2a95b1f0e80585bd9d1ec31068a9bcb157d9750a172836e98bc7a90"},
- {file = "coverage-7.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3721c2c9e4c4953a41a26c14f4cef64330392a6d2d675c8b1db3b645e31f0e"},
- {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca498687ca46a62ae590253fba634a1fe9836bc56f626852fb2720f334c9e4e5"},
- {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cdcbc320b14c3e5877ee79e649677cb7d89ef588852e9583e6b24c2e5072661"},
- {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:57e0204b5b745594e5bc14b9b50006da722827f0b8c776949f1135677e88d0b8"},
- {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fe7502616b67b234482c3ce276ff26f39ffe88adca2acf0261df4b8454668b4"},
- {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9e78295f4144f9dacfed4f92935fbe1780021247c2fabf73a819b17f0ccfff8d"},
- {file = "coverage-7.5.1-cp38-cp38-win32.whl", hash = "sha256:1434e088b41594baa71188a17533083eabf5609e8e72f16ce8c186001e6b8c41"},
- {file = "coverage-7.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:0646599e9b139988b63704d704af8e8df7fa4cbc4a1f33df69d97f36cb0a38de"},
- {file = "coverage-7.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4cc37def103a2725bc672f84bd939a6fe4522310503207aae4d56351644682f1"},
- {file = "coverage-7.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc0b4d8bfeabd25ea75e94632f5b6e047eef8adaed0c2161ada1e922e7f7cece"},
- {file = "coverage-7.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d0a0f5e06881ecedfe6f3dd2f56dcb057b6dbeb3327fd32d4b12854df36bf26"},
- {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9735317685ba6ec7e3754798c8871c2f49aa5e687cc794a0b1d284b2389d1bd5"},
- {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d21918e9ef11edf36764b93101e2ae8cc82aa5efdc7c5a4e9c6c35a48496d601"},
- {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c3e757949f268364b96ca894b4c342b41dc6f8f8b66c37878aacef5930db61be"},
- {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:79afb6197e2f7f60c4824dd4b2d4c2ec5801ceb6ba9ce5d2c3080e5660d51a4f"},
- {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1d0d98d95dd18fe29dc66808e1accf59f037d5716f86a501fc0256455219668"},
- {file = "coverage-7.5.1-cp39-cp39-win32.whl", hash = "sha256:1cc0fe9b0b3a8364093c53b0b4c0c2dd4bb23acbec4c9240b5f284095ccf7981"},
- {file = "coverage-7.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:dde0070c40ea8bb3641e811c1cfbf18e265d024deff6de52c5950677a8fb1e0f"},
- {file = "coverage-7.5.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:6537e7c10cc47c595828b8a8be04c72144725c383c4702703ff4e42e44577312"},
- {file = "coverage-7.5.1.tar.gz", hash = "sha256:54de9ef3a9da981f7af93eafde4ede199e0846cd819eb27c88e2b712aae9708c"},
+ {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"},
+ {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"},
+ {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"},
+ {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"},
+ {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"},
+ {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"},
+ {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"},
+ {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"},
+ {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"},
+ {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"},
+ {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"},
+ {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"},
+ {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"},
+ {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"},
+ {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"},
+ {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"},
+ {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"},
+ {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"},
+ {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"},
+ {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"},
+ {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"},
+ {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"},
+ {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"},
+ {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"},
+ {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"},
+ {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"},
+ {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"},
+ {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"},
+ {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"},
+ {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"},
+ {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"},
+ {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"},
+ {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"},
+ {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"},
+ {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"},
+ {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"},
+ {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"},
+ {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"},
+ {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"},
+ {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"},
+ {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"},
+ {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"},
+ {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"},
+ {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"},
+ {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"},
+ {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"},
+ {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"},
+ {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"},
+ {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"},
+ {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"},
+ {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"},
+ {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"},
]
[package.dependencies]
@@ -1228,18 +1225,15 @@ files = [
[[package]]
name = "nodeenv"
-version = "1.8.0"
+version = "1.9.0"
description = "Node.js virtual environment builder"
optional = false
-python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
- {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
- {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
+ {file = "nodeenv-1.9.0-py2.py3-none-any.whl", hash = "sha256:508ecec98f9f3330b636d4448c0f1a56fc68017c68f1e7857ebc52acf0eb879a"},
+ {file = "nodeenv-1.9.0.tar.gz", hash = "sha256:07f144e90dae547bf0d4ee8da0ee42664a42a04e02ed68e06324348dafe4bdb1"},
]
-[package.dependencies]
-setuptools = "*"
-
[[package]]
name = "numpy"
version = "1.24.4"
@@ -1626,13 +1620,13 @@ virtualenv = ">=20.10.0"
[[package]]
name = "prompt-toolkit"
-version = "3.0.43"
+version = "3.0.45"
description = "Library for building powerful interactive command lines in Python"
optional = false
python-versions = ">=3.7.0"
files = [
- {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"},
- {file = "prompt_toolkit-3.0.43.tar.gz", hash = "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d"},
+ {file = "prompt_toolkit-3.0.45-py3-none-any.whl", hash = "sha256:a29b89160e494e3ea8622b09fa5897610b437884dcdcd054fdc1308883326c2a"},
+ {file = "prompt_toolkit-3.0.45.tar.gz", hash = "sha256:07c60ee4ab7b7e90824b61afa840c8f5aad2d46b3e2e10acc33d8ecc94a49089"},
]
[package.dependencies]
@@ -1640,22 +1634,22 @@ wcwidth = "*"
[[package]]
name = "protobuf"
-version = "5.26.1"
+version = "5.27.0"
description = ""
optional = false
python-versions = ">=3.8"
files = [
- {file = "protobuf-5.26.1-cp310-abi3-win32.whl", hash = "sha256:3c388ea6ddfe735f8cf69e3f7dc7611e73107b60bdfcf5d0f024c3ccd3794e23"},
- {file = "protobuf-5.26.1-cp310-abi3-win_amd64.whl", hash = "sha256:e6039957449cb918f331d32ffafa8eb9255769c96aa0560d9a5bf0b4e00a2a33"},
- {file = "protobuf-5.26.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:38aa5f535721d5bb99861166c445c4105c4e285c765fbb2ac10f116e32dcd46d"},
- {file = "protobuf-5.26.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:fbfe61e7ee8c1860855696e3ac6cfd1b01af5498facc6834fcc345c9684fb2ca"},
- {file = "protobuf-5.26.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:f7417703f841167e5a27d48be13389d52ad705ec09eade63dfc3180a959215d7"},
- {file = "protobuf-5.26.1-cp38-cp38-win32.whl", hash = "sha256:d693d2504ca96750d92d9de8a103102dd648fda04540495535f0fec7577ed8fc"},
- {file = "protobuf-5.26.1-cp38-cp38-win_amd64.whl", hash = "sha256:9b557c317ebe6836835ec4ef74ec3e994ad0894ea424314ad3552bc6e8835b4e"},
- {file = "protobuf-5.26.1-cp39-cp39-win32.whl", hash = "sha256:b9ba3ca83c2e31219ffbeb9d76b63aad35a3eb1544170c55336993d7a18ae72c"},
- {file = "protobuf-5.26.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ee014c2c87582e101d6b54260af03b6596728505c79f17c8586e7523aaa8f8c"},
- {file = "protobuf-5.26.1-py3-none-any.whl", hash = "sha256:da612f2720c0183417194eeaa2523215c4fcc1a1949772dc65f05047e08d5932"},
- {file = "protobuf-5.26.1.tar.gz", hash = "sha256:8ca2a1d97c290ec7b16e4e5dff2e5ae150cc1582f55b5ab300d45cb0dfa90e51"},
+ {file = "protobuf-5.27.0-cp310-abi3-win32.whl", hash = "sha256:2f83bf341d925650d550b8932b71763321d782529ac0eaf278f5242f513cc04e"},
+ {file = "protobuf-5.27.0-cp310-abi3-win_amd64.whl", hash = "sha256:b276e3f477ea1eebff3c2e1515136cfcff5ac14519c45f9b4aa2f6a87ea627c4"},
+ {file = "protobuf-5.27.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:744489f77c29174328d32f8921566fb0f7080a2f064c5137b9d6f4b790f9e0c1"},
+ {file = "protobuf-5.27.0-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:f51f33d305e18646f03acfdb343aac15b8115235af98bc9f844bf9446573827b"},
+ {file = "protobuf-5.27.0-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:56937f97ae0dcf4e220ff2abb1456c51a334144c9960b23597f044ce99c29c89"},
+ {file = "protobuf-5.27.0-cp38-cp38-win32.whl", hash = "sha256:a17f4d664ea868102feaa30a674542255f9f4bf835d943d588440d1f49a3ed15"},
+ {file = "protobuf-5.27.0-cp38-cp38-win_amd64.whl", hash = "sha256:aabbbcf794fbb4c692ff14ce06780a66d04758435717107c387f12fb477bf0d8"},
+ {file = "protobuf-5.27.0-cp39-cp39-win32.whl", hash = "sha256:587be23f1212da7a14a6c65fd61995f8ef35779d4aea9e36aad81f5f3b80aec5"},
+ {file = "protobuf-5.27.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cb65fc8fba680b27cf7a07678084c6e68ee13cab7cace734954c25a43da6d0f"},
+ {file = "protobuf-5.27.0-py3-none-any.whl", hash = "sha256:673ad60f1536b394b4fa0bcd3146a4130fcad85bfe3b60eaa86d6a0ace0fa374"},
+ {file = "protobuf-5.27.0.tar.gz", hash = "sha256:07f2b9a15255e3cf3f137d884af7972407b556a7a220912b252f26dc3121e6bf"},
]
[[package]]
@@ -2031,7 +2025,6 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
- {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2087,13 +2080,13 @@ md = ["cmarkgfm (>=0.8.0)"]
[[package]]
name = "requests"
-version = "2.31.0"
+version = "2.32.3"
description = "Python HTTP for Humans."
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
- {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
+ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+ {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
]
[package.dependencies]
@@ -2205,22 +2198,6 @@ files = [
cryptography = ">=2.0"
jeepney = ">=0.6"
-[[package]]
-name = "setuptools"
-version = "69.5.1"
-description = "Easily download, build, install, upgrade, and uninstall Python packages"
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
- {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
-]
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
-
[[package]]
name = "shapely"
version = "2.0.4"
@@ -2478,17 +2455,17 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
[[package]]
name = "sympy"
-version = "1.12"
+version = "1.12.1"
description = "Computer algebra system (CAS) in Python"
optional = true
python-versions = ">=3.8"
files = [
- {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
- {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
+ {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"},
+ {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"},
]
[package.dependencies]
-mpmath = ">=0.19"
+mpmath = ">=1.1.0,<1.4.0"
[[package]]
name = "toml"
@@ -2584,13 +2561,13 @@ files = [
[[package]]
name = "types-requests"
-version = "2.31.0.20240406"
+version = "2.32.0.20240523"
description = "Typing stubs for requests"
optional = false
python-versions = ">=3.8"
files = [
- {file = "types-requests-2.31.0.20240406.tar.gz", hash = "sha256:4428df33c5503945c74b3f42e82b181e86ec7b724620419a2966e2de604ce1a1"},
- {file = "types_requests-2.31.0.20240406-py3-none-any.whl", hash = "sha256:6216cdac377c6b9a040ac1c0404f7284bd13199c0e1bb235f4324627e8898cf5"},
+ {file = "types-requests-2.32.0.20240523.tar.gz", hash = "sha256:26b8a6de32d9f561192b9942b41c0ab2d8010df5677ca8aa146289d11d505f57"},
+ {file = "types_requests-2.32.0.20240523-py3-none-any.whl", hash = "sha256:f19ed0e2daa74302069bbbbf9e82902854ffa780bc790742a810a9aaa52f65ec"},
]
[package.dependencies]
@@ -2609,20 +2586,20 @@ files = [
[[package]]
name = "typing-extensions"
-version = "4.11.0"
+version = "4.12.0"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
files = [
- {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"},
- {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"},
+ {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"},
+ {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"},
]
[[package]]
name = "tzdata"
version = "2024.1"
description = "Provider of IANA time zone data"
-optional = true
+optional = false
python-versions = ">=2"
files = [
{file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
@@ -2679,13 +2656,13 @@ files = [
[[package]]
name = "zipp"
-version = "3.18.2"
+version = "3.19.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
python-versions = ">=3.8"
files = [
- {file = "zipp-3.18.2-py3-none-any.whl", hash = "sha256:dce197b859eb796242b0622af1b8beb0a722d52aa2f57133ead08edd5bf5374e"},
- {file = "zipp-3.18.2.tar.gz", hash = "sha256:6278d9ddbcfb1f1089a88fde84481528b07b0e10474e09dcfe53dad4069fa059"},
+ {file = "zipp-3.19.0-py3-none-any.whl", hash = "sha256:96dc6ad62f1441bcaccef23b274ec471518daf4fbbc580341204936a5a3dddec"},
+ {file = "zipp-3.19.0.tar.gz", hash = "sha256:952df858fb3164426c976d9338d3961e8e8b3758e2e059e0f754b8c4262625ee"},
]
[package.extras]
@@ -2693,11 +2670,11 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
[extras]
-all = ["PyYAML", "backports-zoneinfo", "geopandas", "geopandas", "numpy", "numpy", "numpy", "pandas", "pandas", "pip", "shapely", "sympy"]
+all = ["PyYAML", "geopandas", "geopandas", "numpy", "numpy", "numpy", "pandas", "pandas", "pip", "shapely", "sympy"]
functions = ["pip"]
geo = ["geopandas", "geopandas", "shapely"]
numpy = ["numpy", "numpy", "numpy"]
-pandas = ["backports-zoneinfo", "pandas", "pandas"]
+pandas = ["pandas", "pandas"]
pyodide = ["pyodide-http"]
sympy = ["sympy"]
yaml = ["PyYAML"]
@@ -2705,4 +2682,4 @@ yaml = ["PyYAML"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
-content-hash = "3ab83a7107afd761f71669a19aa7ce420cf07e5b16844634d595f07a08290252"
+content-hash = "d1af07445f013565e6cdd2b6513b9fc0a1d4df6109f9933a47f3d423f2b10412"
diff --git a/pyproject.toml b/pyproject.toml
index cd263d34e3..656c3ea8c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[tool.poetry]
name = "cognite-sdk"
-version = "7.44.1"
+version = "7.45.0"
description = "Cognite Python SDK"
readme = "README.md"
documentation = "https://cognite-sdk-python.readthedocs-hosted.com"
@@ -30,37 +30,37 @@ msal = "^1"
protobuf = ">=4"
pip = ">=20.0.0" # make optional once poetry doesn't auto-remove it on "simple install"
typing_extensions = ">= 4"
+backports-zoneinfo = { version = ">=0.2.1", python = "<3.9" }
+# Windows does not have a ANSI database and need tzdata
+tzdata = { version = ">=2024.1", markers = "platform_system == 'Windows'" }
numpy = [
- {version = ">=1.20, <1.25", python = "~3.8", optional = true},
- {version = "^1.25", python = ">=3.9, <3.12", optional = true},
- {version = "^1.26", python = "^3.12", optional = true},
+ { version = ">=1.20, <1.25", python = "~3.8", optional = true },
+ { version = "^1.25", python = ">=3.9, <3.12", optional = true },
+ { version = "^1.26", python = "^3.12", optional = true },
]
sympy = { version = "*", optional = true }
pandas = [
{ version = ">=1.4, <2.1", python = "~3.8", optional = true },
{ version = ">=2.1", python = ">=3.9", optional = true },
]
-backports-zoneinfo = { version = ">=0.2.1", python = "<3.9", extras = ["tzdata"], optional = true}
-# Windows does not have a ANSI database and need tzdata
-tzdata = {version = ">=2023.3", markers = "sys_platform == 'win32'", optional = true }
geopandas = [
{ version = ">=0.10, <0.14", python = "~3.8", optional = true },
{ version = ">=0.14", python = ">=3.9", optional = true },
]
shapely = { version = ">=1.7.0", optional = true }
-pyodide-http = {version = "^0.2.1", optional = true }
-graphlib-backport = {version = "^1.0.0", python = "<3.9"}
-PyYAML = {version = "^6.0", optional = true}
+pyodide-http = { version = "^0.2.1", optional = true }
+graphlib-backport = { version = "^1.0.0", python = "<3.9" }
+PyYAML = { version = "^6.0", optional = true }
[tool.poetry.extras]
-pandas = ["pandas", "backports-zoneinfo"]
+pandas = ["pandas"]
numpy = ["numpy"]
geo = ["geopandas", "shapely"]
sympy = ["sympy"]
functions = ["pip"]
yaml = ["PyYAML"]
pyodide = ["pyodide-http"] # keep pyodide related dependencies outside of 'all'
-all = ["numpy", "pandas", "geopandas", "shapely", "sympy", "pip", "backports-zoneinfo", "PyYAML"]
+all = ["numpy", "pandas", "geopandas", "shapely", "sympy", "pip", "PyYAML"]
[tool.poetry.group.dev.dependencies]
docutils = "==0.15.2"
diff --git a/tests/tests_integration/test_api/test_datapoints.py b/tests/tests_integration/test_api/test_datapoints.py
index b05df9dadf..2c9ff342b7 100644
--- a/tests/tests_integration/test_api/test_datapoints.py
+++ b/tests/tests_integration/test_api/test_datapoints.py
@@ -14,13 +14,13 @@
import unittest
from contextlib import nullcontext as does_not_raise
from datetime import datetime, timezone
-from typing import Literal
+from typing import Callable, Literal
from unittest.mock import patch
import numpy as np
import pandas as pd
import pytest
-from numpy.testing import assert_allclose
+from numpy.testing import assert_allclose, assert_equal
from cognite.client import CogniteClient
from cognite.client.data_classes import (
@@ -42,6 +42,7 @@
MAX_TIMESTAMP_MS,
MIN_TIMESTAMP_MS,
UNIT_IN_MS,
+ ZoneInfo,
align_start_and_end_for_granularity,
granularity_to_ms,
timestamp_to_ms,
@@ -57,12 +58,6 @@
set_max_workers,
)
-try:
- from zoneinfo import ZoneInfo
-except ImportError:
- from backports.zoneinfo import ZoneInfo
-
-
DATAPOINTS_API = "cognite.client._api.datapoints.{}"
WEEK_MS = UNIT_IN_MS["w"]
DAY_MS = UNIT_IN_MS["d"]
@@ -176,9 +171,15 @@ def retrieve_endpoints(cognite_client):
]
-def ts_to_ms(ts):
+@pytest.fixture
+def all_retrieve_endpoints(cognite_client, retrieve_endpoints):
+ # retrieve_dataframe is just a wrapper around retrieve_arrays
+ return [*retrieve_endpoints, cognite_client.time_series.data.retrieve_dataframe]
+
+
+def ts_to_ms(ts, tz=None):
assert isinstance(ts, str)
- return pd.Timestamp(ts).value // int(1e6)
+ return pd.Timestamp(ts, tz=tz).value // int(1e6)
def convert_any_ts_to_integer(ts):
@@ -347,6 +348,40 @@ def timeseries_degree_c_minus40_0_100(cognite_client: CogniteClient) -> TimeSeri
return created_timeseries
+@pytest.fixture
+def dps_queries_dst_transitions(all_test_time_series):
+ ts1, ts2 = all_test_time_series[113], all_test_time_series[119]
+ oslo = "Europe/Oslo"
+ return [
+ # DST from winter to summer:
+ DatapointsQuery(
+ id=ts1.id,
+ start=ts_to_ms("1991-03-31 00:20:05.912", tz=oslo),
+ end=ts_to_ms("1991-03-31 03:28:51.903", tz=oslo),
+ timezone=ZoneInfo(oslo),
+ ),
+ # DST from summer to winter:
+ DatapointsQuery(
+ id=ts1.id,
+ start=ts_to_ms("1991-09-29 01:02:37.950", tz=oslo),
+ end=ts_to_ms("1991-09-29 04:12:02.558", tz=oslo),
+ timezone=ZoneInfo(oslo),
+ ),
+ DatapointsQuery(
+ id=ts2.id,
+ start=ts_to_ms("2023-03-26", tz=oslo),
+ end=ts_to_ms("2023-03-26 05:00:00", tz=oslo),
+ timezone=ZoneInfo(oslo),
+ ),
+ DatapointsQuery(
+ id=ts2.id,
+ start=ts_to_ms("2023-10-29 01:00:00", tz=oslo),
+ end=ts_to_ms("2023-10-29 03:00:00.001", tz=oslo),
+ timezone=ZoneInfo(oslo),
+ ),
+ ]
+
+
class TestRetrieveRawDatapointsAPI:
"""Note: Since `retrieve` and `retrieve_arrays` endpoints should give identical results,
except for the data container types, all tests run both endpoints except those targeting a specific bug
@@ -886,6 +921,56 @@ def test_status_codes_and_symbols(self, retrieve_endpoints, ts_status_codes):
assert b3.status_code[-1] == 2165309440
assert b3.status_symbol[-1] == "BadLicenseNotAvailable"
+ def test_query_no_ts_exists(self, retrieve_endpoints):
+ for endpoint, exp_res_lst_type in zip(retrieve_endpoints, DPS_LST_TYPES):
+ ts_id = random_cognite_ids(1) # list of len 1
+ res_lst = endpoint(id=ts_id, ignore_unknown_ids=True)
+ assert isinstance(res_lst, exp_res_lst_type)
+ # SDK bug v<5, id mapping would not exist because empty `.data` on res_lst:
+ assert res_lst.get(id=ts_id[0]) is None
+
+ def test_timezone_raw_query_dst_transitions(self, all_retrieve_endpoints, dps_queries_dst_transitions):
+ expected_index = pd.to_datetime(
+ [
+ # to summer
+ "1991-03-31 00:20:05.911+01:00",
+ "1991-03-31 00:39:49.780+01:00",
+ "1991-03-31 03:21:08.144+02:00",
+ "1991-03-31 03:28:06.963+02:00",
+ "1991-03-31 03:28:51.903+02:00",
+ # to winter
+ "1991-09-29 01:02:37.949+02:00",
+ "1991-09-29 02:09:29.699+02:00",
+ "1991-09-29 02:11:39.983+02:00",
+ "1991-09-29 02:10:59.442+01:00",
+ "1991-09-29 02:52:26.212+01:00",
+ "1991-09-29 04:12:02.558+01:00",
+ ],
+ utc=True, # pandas is not great at parameter names
+ ).tz_convert("Europe/Oslo")
+ expected_to_summer_index = expected_index[:5]
+ expected_to_winter_index = expected_index[5:]
+ for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
+ to_summer, to_winter = dps_lst = endpoint(id=dps_queries_dst_transitions[:2], include_outside_points=True)
+ if convert:
+ dps_lst = dps_lst.to_pandas().astype("Int64")
+ to_summer, to_winter = to_summer.to_pandas().astype(np.int64), to_winter.to_pandas().astype(np.int64)
+ else:
+ dps_lst = dps_lst.astype("Int64")
+ to_summer, to_winter = dps_lst.iloc[:, 0], dps_lst.iloc[:, 1]
+
+ if not convert:
+ for dps in [to_summer, to_winter]:
+ pd.testing.assert_index_equal(expected_index, dps.index)
+ to_summer = to_summer.dropna()
+ to_winter = to_winter.dropna()
+
+ assert list(range(89712, 89717)) == to_summer.squeeze().values.tolist()
+ assert list(range(96821, 96827)) == to_winter.squeeze().values.tolist()
+ pd.testing.assert_index_equal(expected_index, dps_lst.index)
+ pd.testing.assert_index_equal(expected_to_winter_index, to_winter.index)
+ pd.testing.assert_index_equal(expected_to_summer_index, to_summer.index)
+
class TestRetrieveAggregateDatapointsAPI:
@pytest.mark.parametrize(
@@ -1327,65 +1412,20 @@ def test_edge_case_all_aggs_missing(self, one_mill_dps_ts, retrieve_endpoints):
assert df[f"{xid}|count"].dtype == np.int64
assert df[f"{xid}|interpolation"].dtype == np.float64
- def test_query_no_ts_exists(self, retrieve_endpoints):
- for endpoint, exp_res_lst_type in zip(retrieve_endpoints, DPS_LST_TYPES):
- ts_id = random_cognite_ids(1) # list of len 1
- res_lst = endpoint(id=ts_id, ignore_unknown_ids=True)
- assert isinstance(res_lst, exp_res_lst_type)
- # SDK bug v<5, id mapping would not exist because empty `.data` on res_lst:
- assert res_lst.get(id=ts_id[0]) is None
-
- def test_query_with_duplicates(self, retrieve_endpoints, one_mill_dps_ts, ms_bursty_ts):
- ts_numeric, ts_string = one_mill_dps_ts
- for endpoint, exp_res_lst_type in zip(retrieve_endpoints, DPS_LST_TYPES):
- res_lst = endpoint(
- id=[
- ms_bursty_ts.id, # This is the only non-duplicated
- ts_string.id,
- {"id": ts_numeric.id, "granularity": "1d", "aggregates": "average"},
- ],
- external_id=[
- ts_string.external_id,
- ts_numeric.external_id,
- {"external_id": ts_numeric.external_id, "granularity": "1d", "aggregates": "average"},
- ],
- limit=5,
- )
- assert isinstance(res_lst, exp_res_lst_type)
- # Check non-duplicated in result:
- assert isinstance(res_lst.get(id=ms_bursty_ts.id), exp_res_lst_type._RESOURCE)
- assert isinstance(res_lst.get(external_id=ms_bursty_ts.external_id), exp_res_lst_type._RESOURCE)
- # Check duplicated in result:
- assert isinstance(res_lst.get(id=ts_numeric.id), list)
- assert isinstance(res_lst.get(id=ts_string.id), list)
- assert isinstance(res_lst.get(external_id=ts_numeric.external_id), list)
- assert isinstance(res_lst.get(external_id=ts_string.external_id), list)
- assert len(res_lst.get(id=ts_numeric.id)) == 3
- assert len(res_lst.get(id=ts_string.id)) == 2
- assert len(res_lst.get(external_id=ts_numeric.external_id)) == 3
- assert len(res_lst.get(external_id=ts_string.external_id)) == 2
-
- @pytest.mark.parametrize(
- "retrieve_method_name, kwargs",
- itertools.product(
- ["retrieve", "retrieve_arrays", "retrieve_dataframe"],
- [dict(target_unit="temperature:deg_f"), dict(target_unit_system="Imperial")],
- ),
- )
+ @pytest.mark.parametrize("kwargs", (dict(target_unit="temperature:deg_f"), dict(target_unit_system="Imperial")))
def test_retrieve_methods_in_target_unit(
self,
- retrieve_method_name: str,
+ all_retrieve_endpoints: list[Callable],
kwargs: dict,
cognite_client: CogniteClient,
timeseries_degree_c_minus40_0_100: TimeSeries,
) -> None:
ts = timeseries_degree_c_minus40_0_100
- retrieve_method = getattr(cognite_client.time_series.data, retrieve_method_name)
-
- res = retrieve_method(external_id=ts.external_id, aggregates="max", granularity="1h", end=3, **kwargs)
- if isinstance(res, pd.DataFrame):
- res = DatapointsArray(max=res.values)
- assert math.isclose(res.max[0], 212)
+ for retrieve_method in all_retrieve_endpoints:
+ res = retrieve_method(external_id=ts.external_id, aggregates="max", granularity="1h", end=3, **kwargs)
+ if isinstance(res, pd.DataFrame):
+ res = DatapointsArray(max=res.values)
+ assert math.isclose(res.max[0], 212)
def test_status_codes_affect_aggregate_calculations(self, retrieve_endpoints, ts_status_codes):
mixed_ts, _, bad_ts, _ = ts_status_codes # No aggregates for string dps
@@ -1475,6 +1515,92 @@ def test_status_codes_affect_aggregate_calculations(self, retrieve_endpoints, ts
[349079144838.96564, 512343998481.7162, 159180999248.7119, 529224146671.5178],
)
+ def test_timezone_agg_query_dst_transitions(self, all_retrieve_endpoints, dps_queries_dst_transitions):
+ expected_values1 = [0.23625579717753353, 0.02829928231631262, -0.0673823850533647, -0.20908049925449418]
+ expected_values2 = [-0.13218082741552517, -0.20824244773820486, 0.02566169899072951, 0.15040625644292185]
+ expected_index = pd.to_datetime(
+ [
+ # to summer
+ "2023-03-26 00:00:00+01:00",
+ "2023-03-26 01:00:00+01:00",
+ "2023-03-26 03:00:00+02:00",
+ "2023-03-26 04:00:00+02:00",
+ # to winter
+ "2023-10-29 01:00:00+02:00",
+ "2023-10-29 02:00:00+02:00",
+ "2023-10-29 02:00:00+01:00",
+ "2023-10-29 03:00:00+01:00",
+ ],
+ utc=True, # pandas is still not great at parameter names
+ ).tz_convert("Europe/Oslo")
+ expected_to_summer_index = expected_index[:4]
+ expected_to_winter_index = expected_index[4:]
+ for endpoint, convert in zip(all_retrieve_endpoints, (True, True, False)):
+ to_summer, to_winter = dps_lst = endpoint(
+ id=dps_queries_dst_transitions[2:], aggregates="average", granularity="1hour"
+ )
+ if convert:
+ dps_lst = dps_lst.to_pandas()
+ to_summer, to_winter = to_summer.to_pandas(), to_winter.to_pandas()
+ else:
+ to_summer, to_winter = dps_lst.iloc[:, 0], dps_lst.iloc[:, 1]
+
+ if not convert:
+ for dps in [to_summer, to_winter]:
+ pd.testing.assert_index_equal(expected_index, dps.index)
+ to_summer = to_summer.dropna()
+ to_winter = to_winter.dropna()
+
+ assert_allclose(expected_values1, to_summer.squeeze().to_numpy())
+ assert_allclose(expected_values2, to_winter.squeeze().to_numpy())
+ pd.testing.assert_index_equal(expected_index, dps_lst.index)
+ pd.testing.assert_index_equal(expected_to_winter_index, to_winter.index)
+ pd.testing.assert_index_equal(expected_to_summer_index, to_summer.index)
+
+ def test_calendar_granularities_in_utc_and_timezone(self, retrieve_endpoints, all_test_time_series):
+ daily_ts, oslo = all_test_time_series[108], ZoneInfo("Europe/Oslo")
+ granularities = [
+ "1" + random.choice(["mo", "month", "months"]),
+ "1" + random.choice(["q", "quarter", "quarters"]),
+ "1" + random.choice(["y", "year", "years"]),
+ ]
+ for endpoint in retrieve_endpoints:
+ mo_utc, q_utc, y_utc, mo_oslo, q_oslo, y_oslo = endpoint(
+ id=[DatapointsQuery(id=daily_ts.id, granularity=gran) for gran in granularities],
+ external_id=[
+ DatapointsQuery(external_id=daily_ts.external_id, granularity=gran, timezone=oslo)
+ for gran in granularities
+ ],
+ start=ts_to_ms("1964-01-01"),
+ end=ts_to_ms("1974-12-31"),
+ aggregates="count",
+ )
+ assert_equal(mo_utc.count, mo_oslo.count)
+ assert_equal(q_utc.count, q_oslo.count)
+ assert_equal(y_utc.count, y_oslo.count)
+
+ # Verify that the number of days per year/quarter/month follows the actual calendar:
+ exp_days_per_year = [
+ 365, 365, 365, 366,
+ 365, 365, 365, 366,
+ 365, 365, # ^^^
+ ] # fmt: skip
+ exp_days_per_quarter = [
+ 90, 91, 92, 92,
+ 90, 91, 92, 92,
+ 90, 91, 92, 92,
+ 91, 91, 92, 92, # Look, I'm special
+ ] # fmt: skip
+ exp_days_per_month = [
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, # star of the show: Feb 29
+ ] # fmt: skip
+ assert_equal(exp_days_per_year, y_utc.count)
+ assert_equal(exp_days_per_quarter, q_utc.count[: 4 * 4])
+ assert_equal(exp_days_per_month, mo_utc.count[: 12 * 4])
+
def retrieve_dataframe_in_tz_count_large_granularities_data():
# "start, end, granularity, expected_df"
@@ -1904,6 +2030,36 @@ def test_multiple_settings_for_ignore_unknown_ids(
dps_id = res_lst.get(id=ts_num.id)
assert isinstance(dps_id, exp_res_lst_type._RESOURCE)
+ def test_query_with_duplicates(self, retrieve_endpoints, one_mill_dps_ts, ms_bursty_ts):
+ ts_numeric, ts_string = one_mill_dps_ts
+ for endpoint, exp_res_lst_type in zip(retrieve_endpoints, DPS_LST_TYPES):
+ res_lst = endpoint(
+ id=[
+ ms_bursty_ts.id, # This is the only non-duplicated
+ ts_string.id,
+ {"id": ts_numeric.id, "granularity": "1d", "aggregates": "average"},
+ ],
+ external_id=[
+ ts_string.external_id,
+ ts_numeric.external_id,
+ {"external_id": ts_numeric.external_id, "granularity": "1d", "aggregates": "average"},
+ ],
+ limit=5,
+ )
+ assert isinstance(res_lst, exp_res_lst_type)
+ # Check non-duplicated in result:
+ assert isinstance(res_lst.get(id=ms_bursty_ts.id), exp_res_lst_type._RESOURCE)
+ assert isinstance(res_lst.get(external_id=ms_bursty_ts.external_id), exp_res_lst_type._RESOURCE)
+ # Check duplicated in result:
+ assert isinstance(res_lst.get(id=ts_numeric.id), list)
+ assert isinstance(res_lst.get(id=ts_string.id), list)
+ assert isinstance(res_lst.get(external_id=ts_numeric.external_id), list)
+ assert isinstance(res_lst.get(external_id=ts_string.external_id), list)
+ assert len(res_lst.get(id=ts_numeric.id)) == 3
+ assert len(res_lst.get(id=ts_string.id)) == 2
+ assert len(res_lst.get(external_id=ts_numeric.external_id)) == 3
+ assert len(res_lst.get(external_id=ts_string.external_id)) == 2
+
class TestRetrieveDataFrameAPI:
"""The `retrieve_dataframe` endpoint uses `retrieve_arrays` under the hood, so lots of tests
diff --git a/tests/tests_integration/test_api/test_iam.py b/tests/tests_integration/test_api/test_iam.py
index 499a9dae75..092149c88e 100644
--- a/tests/tests_integration/test_api/test_iam.py
+++ b/tests/tests_integration/test_api/test_iam.py
@@ -22,6 +22,9 @@ def test_dump_load_group_list(self, group_list: GroupList) -> None:
loaded = GroupList.load(group_list.dump(camel_case=True))
assert group_list.dump() == loaded.dump()
+ @pytest.mark.skip(
+ reason="CogniteAPIError: There can only be 1500 undeleted or deleted groups per project | code: 400"
+ )
@pytest.mark.parametrize("source_id, members", (("abc-123", None), (None, ["user1", "user2"])))
def test_create(self, cognite_client, source_id, members):
metadata = {"haha": "blabla"}
diff --git a/tests/tests_unit/test_api/test_datapoints.py b/tests/tests_unit/test_api/test_datapoints.py
index 55a8da7466..7bdbea24c9 100644
--- a/tests/tests_unit/test_api/test_datapoints.py
+++ b/tests/tests_unit/test_api/test_datapoints.py
@@ -15,7 +15,7 @@
from cognite.client.data_classes import Datapoint, Datapoints, DatapointsList, LatestDatapointQuery
from cognite.client.exceptions import CogniteAPIError, CogniteNotFoundError
from cognite.client.utils import _json
-from cognite.client.utils._time import granularity_to_ms, import_zoneinfo
+from cognite.client.utils._time import ZoneInfo, granularity_to_ms
from tests.utils import jsgz_load, random_gamma_dist_integer
DATAPOINTS_API = "cognite.client._api.datapoints.{}"
@@ -968,7 +968,6 @@ class TestRetrieveDataPointsInTz:
def test_retrieve_data_points_in_tz_invalid_user_input(
args: dict, expected_error_message: str, start_tz: str | None, end_tz: str | None, cognite_client: CogniteClient
):
- ZoneInfo = import_zoneinfo()
if start_tz is not None:
args["start"] = args["start"].astimezone(ZoneInfo(start_tz))
if end_tz is not None:
diff --git a/tests/tests_unit/test_base.py b/tests/tests_unit/test_base.py
index 2b4d81e3d3..1d72325649 100644
--- a/tests/tests_unit/test_base.py
+++ b/tests/tests_unit/test_base.py
@@ -417,7 +417,6 @@ def test_json_serialize(self, cognite_resource_subclass: type[CogniteResource],
dumped = instance.dump(camel_case=True)
json_serialised = _json.dumps(dumped)
loaded = instance.load(json_serialised, cognite_client=cognite_mock_client_placeholder)
-
assert loaded.dump() == instance.dump()
@pytest.mark.dsl
diff --git a/tests/tests_unit/test_data_classes/test_datapoints.py b/tests/tests_unit/test_data_classes/test_datapoints.py
index 53bff05773..048417c4f8 100644
--- a/tests/tests_unit/test_data_classes/test_datapoints.py
+++ b/tests/tests_unit/test_data_classes/test_datapoints.py
@@ -1,10 +1,59 @@
from __future__ import annotations
import math
+from datetime import timedelta, timezone
import pytest
-from cognite.client.data_classes import DatapointsArray
+from cognite.client.data_classes import Datapoint, DatapointsArray
+from cognite.client.utils._time import ZoneInfo
+
+
+class TestDatapoint:
+ def test_display_str_no_timezone(self):
+ dp = Datapoint(timestamp=1716589737000, value="foo", average=123)
+ assert "timezone" not in str(dp)
+ assert '"timestamp": "2024-05-24 22:28:57.000+00:00"' in str(dp)
+ dp.timezone = None
+ assert "timezone" not in str(dp)
+ assert '"timestamp": "2024-05-24 22:28:57.000+00:00"' in str(dp)
+
+ def test_display_str_with_builtin_timezone(self):
+ epoch_ms = 1716589737000
+ dp = Datapoint(timestamp=epoch_ms, value="foo", average=123)
+ dp.timezone = timezone(timedelta(hours=2))
+ assert "timezone" in str(dp)
+ assert '"timestamp": "2024-05-25 00:28:57.000+02:00"' in str(dp)
+
+ # Timezone is only a setting for how to display the timestamp:
+ dp.timezone = timezone(timedelta(hours=-2))
+ assert '"timestamp": "2024-05-24 20:28:57.000-02:00"' in str(dp)
+ assert dp.timestamp == epoch_ms
+
+ @pytest.mark.dsl
+ @pytest.mark.parametrize(
+ "epoch_ms, offset_hours, zone, expected",
+ (
+ (1716589737000, 2, "Europe/Oslo", "2024-05-25 00:28:57.000+02:00"),
+ (1616589737000, 1, "Europe/Oslo", "2021-03-24 13:42:17.000+01:00"),
+ ),
+ )
+ def test_display_str_and_to_pandas_with_timezone_and_zoneinfo(self, epoch_ms, offset_hours, zone, expected):
+ import pandas as pd
+
+ dp1 = Datapoint(timestamp=epoch_ms, value="foo", average=123)
+ dp2 = Datapoint(timestamp=epoch_ms, value="foo", average=123)
+ dp1.timezone = ZoneInfo(zone)
+ dp2.timezone = timezone(timedelta(hours=offset_hours))
+ sdp1, sdp2 = str(dp1), str(dp2)
+
+ assert sdp1 != sdp2
+ assert sdp1.replace("Europe/Oslo", "") == sdp2.replace(f"UTC+0{offset_hours}:00", "")
+ assert f'"timestamp": "{expected}"' in sdp1
+
+ df1, df2 = dp1.to_pandas(), dp2.to_pandas()
+ assert 1 == len(df1.index) == len(df2.index)
+ assert pd.Timestamp(expected) == df1.index[0] == df2.index[0]
def factory_method_from_array_data():
diff --git a/tests/tests_unit/test_utils/test_auxiliary.py b/tests/tests_unit/test_utils/test_auxiliary.py
index ae211185ff..df687f8ab4 100644
--- a/tests/tests_unit/test_utils/test_auxiliary.py
+++ b/tests/tests_unit/test_utils/test_auxiliary.py
@@ -104,7 +104,7 @@ class TestRemoveDuplicatesKeepOrder:
("abccba", ["a", "b", "c"]),
),
)
- def test_no_duplicates_asdffdsa(self, inp, expected):
+ def test_no_duplicates(self, inp, expected):
assert expected == remove_duplicates_keep_order(inp)
diff --git a/tests/tests_unit/test_utils/test_time.py b/tests/tests_unit/test_utils/test_time.py
index 9b3dfdd68b..b55250d308 100644
--- a/tests/tests_unit/test_utils/test_time.py
+++ b/tests/tests_unit/test_utils/test_time.py
@@ -15,15 +15,17 @@
MAX_TIMESTAMP_MS,
MIN_TIMESTAMP_MS,
MonthAligner,
+ ZoneInfo,
align_large_granularity,
align_start_and_end_for_granularity,
convert_and_isoformat_time_attrs,
datetime_to_ms,
datetime_to_ms_iso_timestamp,
granularity_to_ms,
- import_zoneinfo,
ms_to_datetime,
pandas_date_range_tz,
+ parse_str_timezone,
+ parse_str_timezone_offset,
split_time_range,
timestamp_to_ms,
to_fixed_utc_intervals,
@@ -36,6 +38,44 @@
import pandas
+@pytest.mark.parametrize(
+ "offset_inp, expected",
+ (
+ ("", timedelta(0)),
+ ("01:15", timedelta(seconds=4500)),
+ ("01:15:12", timedelta(seconds=4512)),
+ ("23:59", timedelta(seconds=86340)),
+ ("0", timedelta(0)),
+ ("01", timedelta(seconds=3600)),
+ ("1", timedelta(seconds=3600)),
+ ("7", timedelta(seconds=25200)),
+ ("18", timedelta(seconds=64800)),
+ ),
+)
+def test_parse_str_timezone_offset(offset_inp, expected):
+ for pm in "+-":
+ for prefix in ["", "UTC", "UT", "GMT"]:
+ if not prefix and not offset_inp:
+ continue
+ inp = prefix + pm + offset_inp if offset_inp else prefix
+ res = parse_str_timezone_offset(inp)
+ assert res == timezone(int(pm + "1") * expected)
+
+
+@pytest.mark.parametrize(
+ "inp, expected",
+ (
+ ("Europe/Oslo", ZoneInfo("Europe/Oslo")),
+ ("Asia/Tokyo", ZoneInfo("Asia/Tokyo")),
+ ("GMT", ZoneInfo("GMT")),
+ ("UTC-0", timezone.utc),
+ ("UTC+01:15", timezone(timedelta(seconds=4500))),
+ ),
+)
+def test_parse_str_timezone(inp, expected):
+ assert expected == parse_str_timezone(inp)
+
+
class TestDatetimeToMsIsoTimestamp:
@pytest.mark.skipif(platform.system() == "Windows", reason="Overriding timezone is too much hassle on Windows")
def test_timezone_unaware(self):
@@ -46,7 +86,6 @@ def test_timezone_unaware(self):
@pytest.mark.dsl
def test_timezone_cet(self):
- ZoneInfo = import_zoneinfo()
input_datetime = datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=ZoneInfo("CET"))
utc_datetime = input_datetime.astimezone(timezone.utc)
assert datetime_to_ms_iso_timestamp(input_datetime) == "2021-01-01T00:00:00.000+01:00"
@@ -55,7 +94,6 @@ def test_timezone_cet(self):
@pytest.mark.dsl
@pytest.mark.skipif(platform.system() == "Windows", reason="Overriding timezone is too much hassle on Windows")
def test_timezone_cet_in_local_tz(self):
- ZoneInfo = import_zoneinfo()
input_datetime = datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=ZoneInfo("CET"))
with tmp_set_envvar("TZ", "UTC"):
time.tzset()
@@ -93,21 +131,17 @@ def test_naive_datetime_to_ms_windows(self):
datetime_to_ms(datetime(1925, 8, 3))
def test_aware_datetime_to_ms(self):
- # TODO: Starting from PY39 we should also add tests using:
- # from zoneinfo import ZoneInfo
- # datetime(2020, 10, 31, 12, tzinfo=ZoneInfo("America/Los_Angeles"))
utc = timezone.utc
assert datetime_to_ms(datetime(2018, 1, 31, tzinfo=utc)) == 1517356800000
assert datetime_to_ms(datetime(2018, 1, 31, 11, 11, 11, tzinfo=utc)) == 1517397071000
assert datetime_to_ms(datetime(100, 1, 31, tzinfo=utc)) == -59008867200000
- @pytest.mark.dsl
def test_aware_datetime_to_ms_zoneinfo(self):
- ZoneInfo = import_zoneinfo()
# The correct answer was obtained using: https://dencode.com/en/date/unix-time
assert datetime_to_ms(datetime(2018, 1, 31, tzinfo=ZoneInfo("Europe/Oslo"))) == 1517353200000
assert datetime_to_ms(datetime(1900, 1, 1, tzinfo=ZoneInfo("Europe/Oslo"))) == -2208992400000
assert datetime_to_ms(datetime(1900, 1, 1, tzinfo=ZoneInfo("America/New_York"))) == -2208970800000
+ assert datetime_to_ms(datetime(2020, 10, 31, 12, tzinfo=ZoneInfo("America/Los_Angeles"))) == 1604170800000
def test_ms_to_datetime__valid_input(self): # TODO: Many tests here could benefit from parametrize
utc = timezone.utc
@@ -388,11 +422,6 @@ def test_cdf_aggregation(
def to_fixed_utc_intervals_data() -> Iterable[ParameterSet]:
- try:
- ZoneInfo = import_zoneinfo()
- except CogniteImportError:
- return []
-
oslo = ZoneInfo("Europe/Oslo")
utc = dict(tzinfo=ZoneInfo("UTC"))
oslo_dst = datetime(2023, 3, 25, 23, **utc)
@@ -519,11 +548,6 @@ def test_to_fixed_utc_intervals(
def validate_time_zone_invalid_arguments_data() -> list[ParameterSet]:
- try:
- ZoneInfo = import_zoneinfo()
- except CogniteImportError:
- return []
-
oslo = ZoneInfo("Europe/Oslo")
new_york = ZoneInfo("America/New_York")
@@ -557,7 +581,6 @@ def validate_time_zone_invalid_arguments_data() -> list[ParameterSet]:
def validate_time_zone_valid_arguments_data() -> list[ParameterSet]:
try:
- ZoneInfo = import_zoneinfo()
import pandas as pd
import pytz # hard pandas dependency
except (ImportError, CogniteImportError):
@@ -632,7 +655,6 @@ def test_raise_value_error_invalid_arguments(start: datetime, end: datetime, exp
def test_infer_timezone(start: datetime, end: datetime, expected_tz):
actual_tz = validate_timezone(start, end)
- ZoneInfo = import_zoneinfo()
assert isinstance(actual_tz, ZoneInfo)
assert actual_tz is expected_tz
@@ -674,7 +696,6 @@ class TestPandasDateRangeTz:
@staticmethod
@pytest.mark.dsl
def test_pandas_date_range_tz_ambiguous_time_error():
- ZoneInfo = import_zoneinfo()
oslo = ZoneInfo("Europe/Oslo")
start = datetime(1916, 8, 1, tzinfo=oslo)
end = datetime(1916, 12, 1, tzinfo=oslo)
diff --git a/tests/utils.py b/tests/utils.py
index 893822e1bd..2b1ca41967 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -14,6 +14,7 @@
import typing
from collections import Counter
from contextlib import contextmanager
+from datetime import timedelta, timezone
from typing import TYPE_CHECKING, Any, Literal, Mapping, TypeVar, cast, get_args, get_origin, get_type_hints
from cognite.client import CogniteClient
@@ -420,6 +421,8 @@ def create_instance(self, resource_cls: type[T_Object], skip_defaulted_args: boo
keyword_arguments["through"] = [keyword_arguments["through"][0], "my_view/v1", "a_property"]
elif resource_cls is Buckets:
keyword_arguments = {"items": [{"start": 1, "count": 1}]}
+ elif resource_cls is timezone:
+ positional_arguments.append(timedelta(hours=self._random.randint(-3, 3)))
return resource_cls(*positional_arguments, **keyword_arguments)