Skip to content

Commit

Permalink
suggestion: Instance.to_pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
haakonvt committed Sep 4, 2024
1 parent aa9488f commit a6bd8c6
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 55 deletions.
11 changes: 9 additions & 2 deletions cognite/client/data_classes/data_modeling/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,13 @@ def to_pandas( # type: ignore [override]
col = df.squeeze()
prop_df = pd.json_normalize(col.pop("properties"), max_level=2)
if remove_property_prefix and not prop_df.empty:
from cognite.client.data_classes.data_modeling.typed_instances import TypedEdge, TypedNode

if isinstance(self, (TypedEdge, TypedNode)):
view_id, *extra = [self.get_source()] # type: ignore [attr-defined]
else:
view_id, *extra = self.properties.keys()
# We only do/allow this if we have a single source:
view_id, *extra = self.properties.keys()
if not extra:
prop_df.columns = prop_df.columns.str.removeprefix("{}.{}/{}.".format(*view_id.as_tuple()))
else:
Expand Down Expand Up @@ -1023,8 +1028,10 @@ def to_pandas( # type: ignore [override]

prop_df = local_import("pandas").json_normalize(df.pop("properties"), max_level=2)
if remove_property_prefix and not prop_df.empty:
from cognite.client.data_classes.data_modeling.typed_instances import TypedEdge, TypedNode

# We only do/allow this if we have a single source:
typed_view_ids = {item.get_source() for item in self if hasattr(item, "get_source")}
typed_view_ids = {item.get_source() for item in self if isinstance(item, (TypedEdge, TypedNode))}
view_id, *extra = set(vid for item in self for vid in item.properties) | typed_view_ids
if not extra:
prop_df.columns = prop_df.columns.str.removeprefix("{}.{}/{}.".format(*view_id.as_tuple()))
Expand Down
55 changes: 2 additions & 53 deletions cognite/client/data_classes/data_modeling/typed_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@
NodeApply,
_serialize_property_value,
)
from cognite.client.utils._importing import local_import
from cognite.client.utils._text import to_camel_case
from cognite.client.utils._time import TIME_ATTRIBUTES, convert_data_modelling_timestamp
from cognite.client.utils._time import convert_data_modelling_timestamp

if TYPE_CHECKING:
import pandas as pd

from cognite.client import CogniteClient


Expand Down Expand Up @@ -160,46 +157,6 @@ def _load(cls, resource: dict[str, Any], cognite_client: CogniteClient | None =
properties = all_properties.get(source.space, {}).get(source.as_source_identifier(), {})
return cast(Self, _load_instance(cls, resource, properties, cls._instance_properties))

def to_pandas( # type: ignore [override]
self,
ignore: list[str] | None = None,
camel_case: bool = False,
convert_timestamps: bool = True,
**kwargs: Any,
) -> pd.DataFrame:
"""Convert the instance into a pandas DataFrame.
Args:
ignore (list[str] | None): List of row keys to skip when converting to a data frame. Is applied before expansions.
camel_case (bool): Convert attribute names to camel case (e.g. `externalId` instead of `external_id`). Does not affect properties if expanded.
convert_timestamps (bool): Convert known attributes storing CDF timestamps (milliseconds since epoch) to datetime.
**kwargs (Any): For backwards compatibility.
Returns:
pd.DataFrame: The instance as a pandas Series.
"""
pd = local_import("pandas")
for key in ["expand_metadata", "metadata_prefix", "expand_properties", "remove_property_prefix"]:
kwargs.pop(key, None)
if kwargs:
raise TypeError(f"Unsupported keyword arguments: {kwargs}")

dumped = super().dump(camel_case)
dumped.pop("properties", None)
properties = _dump_properties(
self, camel_case=camel_case, instance_properties=self._instance_properties, use_attribute_name=True
)
dumped.update(properties)

if convert_timestamps:
for k in TIME_ATTRIBUTES.intersection(dumped):
dumped[k] = pd.Timestamp(dumped[k], unit="ms")

for element in ignore or []:
dumped.pop(element, None)

return pd.Series(dumped)


class TypedEdge(Edge, ABC):
_instance_properties = frozenset(
Expand Down Expand Up @@ -317,15 +274,9 @@ def _get_properties_by_name(cls: type) -> dict[str, PropertyOptions]:
)


def _dump_properties(
obj: object, camel_case: bool, instance_properties: frozenset[str], use_attribute_name: bool = False
) -> dict[str, Any]:
def _dump_properties(obj: object, camel_case: bool, instance_properties: frozenset[str]) -> dict[str, Any]:
properties: dict[str, str | int | float | bool | dict | list] = {}
properties_by_name = _get_properties_by_name(type(obj))
if use_attribute_name:
attribute_by_property = {v.name: k for k, v in properties_by_name.items()}
else:
attribute_by_property = {}
for key, value in vars(obj).items():
if key in instance_properties or value is None:
continue
Expand All @@ -334,8 +285,6 @@ def _dump_properties(

if key in properties_by_name:
key = cast(str, properties_by_name[key].name)
if use_attribute_name and key in attribute_by_property:
key = attribute_by_property[key]

if isinstance(value, Iterable) and not isinstance(value, (str, dict)):
properties[key] = [_serialize_property_value(v, camel_case) for v in value]
Expand Down

0 comments on commit a6bd8c6

Please sign in to comment.