Skip to content

Commit

Permalink
add: unit lookup from alias (and quantity) (#2027)
Browse files Browse the repository at this point in the history
  • Loading branch information
haakonvt authored Nov 19, 2024
1 parent 462ade8 commit 03a073d
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 25 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [7.67.0] - 2023-11-19
### Added
- Convenience method `from_alias` to the UnitsAPI (`client.units.from_alias`) to help with looking up
units by their aliases (similarity search is supported).

## [7.66.1] - 2023-11-18
### Removed
- The Core Data Model (v1) is now considered stable and the alpha warning has been removed.
Expand Down
143 changes: 140 additions & 3 deletions cognite/client/_api/units.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from __future__ import annotations

from typing import TYPE_CHECKING, overload
import difflib
from collections import defaultdict
from functools import cached_property
from itertools import chain
from typing import TYPE_CHECKING, Literal, overload

from cognite.client._api_client import APIClient
from cognite.client.data_classes.units import (
Expand All @@ -9,6 +13,7 @@
UnitSystem,
UnitSystemList,
)
from cognite.client.utils._auxiliary import remove_duplicates_keep_order
from cognite.client.utils._identifier import IdentifierSequence
from cognite.client.utils.useful_types import SequenceNotStr

Expand Down Expand Up @@ -69,6 +74,138 @@ def retrieve(
ignore_unknown_ids=ignore_unknown_ids,
)

@cached_property
def _create_unit_lookups(self) -> tuple[dict[str, dict[str, Unit]], dict[str, list[Unit]]]:
units = self.list()
alias_by_quantity: defaultdict[str, dict[str, Unit]] = defaultdict(dict)
for unit in units:
dct = alias_by_quantity[unit.quantity]
# fun fact, for some units, alias_names has duplicates:
for alias in unit.alias_names:
dct[alias] = unit

alias_lookup = defaultdict(list)
for dct in alias_by_quantity.values():
for alias, unit in dct.items():
alias_lookup[alias].append(unit)
# we want failed lookups to raise, so we convert to dict:
return dict(alias_by_quantity), dict(alias_lookup)

@overload
def from_alias(
self,
alias: str,
quantity: str | None,
return_ambiguous: Literal[False],
return_closest_matches: Literal[False],
) -> Unit: ...

@overload
def from_alias(
self,
alias: str,
quantity: str | None,
return_ambiguous: bool,
return_closest_matches: bool,
) -> Unit | UnitList: ...

def from_alias(
self,
alias: str,
quantity: str | None = None,
return_ambiguous: bool = False,
return_closest_matches: bool = False,
) -> Unit | UnitList:
"""Look up a unit by alias, optionally for a given quantity. Aliases and quantities are case-sensitive.
Note:
When just ``alias`` is given (i.e. ``quantity`` is not specified), some aliases are ambiguous as they are used
by several quantities, e.g. 'F' which can be both Farad (Capacitance) and Fahrenheit (Temperature). These raise
a ValueError by default unless also ``return_ambiguous=True`` is passed, in which case all matching units are returned.
Tip:
You can use ``return_closest_matches=True`` to get the closest matching units if the lookup fails. Note that there
may not be any close matches, in which case an empty UnitList is returned.
Args:
alias (str): Alias of the unit, like 'cmol / L' or 'meter per second'.
quantity (str | None): Quantity of the unit, like 'Temperature' or 'Pressure'.
return_ambiguous (bool): If False (default), when the alias is ambiguous (i.e. no quantity was given), raise a ValueError. If True, return the list of all matching units.
return_closest_matches (bool): If False (default), when the lookup fails, raise a ValueError (default). If True, return the closest matching units (even if empty).
Returns:
Unit | UnitList: The unit if found, else a ValueError is raised. If one or both of ``return_ambiguous`` and ``return_closest_matches`` is passed as True, a UnitList may be returned.
Examples:
Look up a unit by alias only:
>>> from cognite.client import CogniteClient
>>> client = CogniteClient()
>>> unit = client.units.from_alias('cmol / L')
Look up ambiguous alias 'F' by passing quantity 'Temperature':
>>> unit = client.units.from_alias('F', 'Temperature')
Search for the closest matching unit of 'kilo watt' (should be 'kilowatt'):
>>> unit_matches = client.units.from_alias("kilo watt", return_closest_matches=True)
"""
alias_by_quantity, alias_lookup = self._create_unit_lookups
if quantity is None:
return self._lookup_unit_by_alias(alias, alias_lookup, return_ambiguous, return_closest_matches)
else:
return self._lookup_unit_by_alias_and_quantity(alias, alias_by_quantity, quantity, return_closest_matches)

@staticmethod
def _lookup_unit_by_alias(
alias: str, alias_lookup: dict[str, list[Unit]], return_ambiguous: bool, return_closest_matches: bool
) -> Unit | UnitList:
try:
unit, *extra = alias_lookup[alias]
if not extra:
return unit
elif return_ambiguous:
return UnitList([unit, *extra])
raise ValueError(f"Ambiguous alias, matches all of: {[u.external_id for u in (unit, *extra)]}") from None

except KeyError:
err_msg = f"Unknown {alias=}"
close_matches = difflib.get_close_matches(alias, alias_lookup, n=10)
if return_closest_matches:
return UnitList(
remove_duplicates_keep_order(chain.from_iterable(alias_lookup[m] for m in close_matches))
)
if close_matches:
err_msg += f", did you mean one of: {sorted(close_matches)}?"
raise ValueError(err_msg) from None

@staticmethod
def _lookup_unit_by_alias_and_quantity(
alias: str, alias_by_quantity: dict[str, dict[str, Unit]], quantity: str, return_closest_matches: bool
) -> Unit | UnitList:
try:
quantity_dct = alias_by_quantity[quantity]
except KeyError:
# All except one are title-cased (API Gravity - which stands for 'American Petroleum Institute' obviously...)
if quantity.title() in alias_by_quantity:
quantity_dct = alias_by_quantity[quantity.title()]
else:
err_msg = f"Unknown {quantity=}."
if close_matches := difflib.get_close_matches(quantity, alias_by_quantity, n=3):
err_msg += f" Did you mean one of: {close_matches}?"
raise ValueError(err_msg + f" All known quantities: {sorted(alias_by_quantity)}") from None
try:
return quantity_dct[alias]
except KeyError:
err_msg = f"Unknown {alias=} for known {quantity=}."
if close_matches := difflib.get_close_matches(alias, quantity_dct, n=3):
if return_closest_matches:
return UnitList(remove_duplicates_keep_order(quantity_dct[m] for m in close_matches))
err_msg += f" Did you mean one of: {close_matches}?"
raise ValueError(err_msg) from None

def list(self) -> UnitList:
"""`List all supported units <https://developer.cognite.com/api#tag/Units/operation/listUnits>`_
Expand All @@ -77,7 +214,7 @@ def list(self) -> UnitList:
Examples:
List all supported unit in CDF::
List all supported units in CDF:
>>> from cognite.client import CogniteClient
>>> client = CogniteClient()
Expand All @@ -97,7 +234,7 @@ def list(self) -> UnitSystemList:
Examples:
List all supported unit systems in CDF::
List all supported unit systems in CDF:
>>> from cognite.client import CogniteClient
>>> client = CogniteClient()
Expand Down
2 changes: 1 addition & 1 deletion cognite/client/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import annotations

__version__ = "7.66.1"
__version__ = "7.67.0"
__api_subversion__ = "20230101"
20 changes: 11 additions & 9 deletions cognite/client/data_classes/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

from typing_extensions import Self

from cognite.client.data_classes._base import CogniteResource, CogniteResourceList
from cognite.client.data_classes._base import (
CogniteResource,
CogniteResourceList,
ExternalIDTransformerMixin,
NameTransformerMixin,
)
from cognite.client.utils._text import convert_dict_to_case

if TYPE_CHECKING:
Expand Down Expand Up @@ -99,6 +104,9 @@ def __init__(
self.source = source
self.source_reference = source_reference

def __hash__(self) -> int:
return hash(self.external_id)

def as_unit_id(self) -> UnitID:
"""Returns the UnitID of this unit."""
return UnitID(unit_external_id=self.external_id, name=self.name)
Expand All @@ -123,14 +131,11 @@ def dump(self, camel_case: bool = True) -> dict[str, Any]:
return convert_dict_to_case(dumped, camel_case)


class UnitList(CogniteResourceList[Unit]):
class UnitList(CogniteResourceList[Unit], ExternalIDTransformerMixin):
"""List of Units"""

_RESOURCE = Unit

def as_external_ids(self) -> list[str]:
return [unit.external_id for unit in self]


class UnitSystem(CogniteResource):
"""
Expand All @@ -157,10 +162,7 @@ def dump(self, camel_case: bool = True) -> dict[str, Any]:
return {"name": self.name, "quantities": [quantity.dump(camel_case) for quantity in self.quantities]}


class UnitSystemList(CogniteResourceList[UnitSystem]):
class UnitSystemList(CogniteResourceList[UnitSystem], NameTransformerMixin):
"""List of Unit Systems"""

_RESOURCE = UnitSystem

def as_names(self) -> list[str]:
return [unit_system.name for unit_system in self]
2 changes: 1 addition & 1 deletion cognite/client/utils/_auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def find_duplicates(seq: Iterable[THashable]) -> set[THashable]:
return {x for x in seq if x in seen or add(x)}


def remove_duplicates_keep_order(seq: Sequence[THashable]) -> list[THashable]:
def remove_duplicates_keep_order(seq: Iterable[THashable]) -> list[THashable]:
seen: set[THashable] = set()
add = seen.add
return [x for x in seq if x not in seen and not add(x)]
Expand Down
4 changes: 4 additions & 0 deletions docs/source/unit_catalog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Retrieve Unit
^^^^^^^^^^^^^^^^^^^
.. automethod:: cognite.client._api.units.UnitAPI.retrieve

Look up Unit from alias
^^^^^^^^^^^^^^^^^^^^^^^
.. automethod:: cognite.client._api.units.UnitAPI.from_alias

Unit Systems
------------------
List Unit System
Expand Down
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "cognite-sdk"

version = "7.66.1"
version = "7.67.0"
description = "Cognite Python SDK"
readme = "README.md"
documentation = "https://cognite-sdk-python.readthedocs-hosted.com"
Expand Down
67 changes: 60 additions & 7 deletions tests/tests_integration/test_api/test_unit.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import random
import re

import pytest

from cognite.client import CogniteClient
Expand All @@ -18,26 +21,24 @@ def available_units(cognite_client: CogniteClient) -> UnitList:

class TestUnits:
def test_retrieve_unit(self, cognite_client: CogniteClient, available_units: UnitList) -> None:
unit = available_units[0]
unit = random.choice(available_units)

retrieved_unit = cognite_client.units.retrieve(unit.external_id)

assert retrieved_unit == unit

def test_retrieve_multiple(self, cognite_client: CogniteClient, available_units: UnitList) -> None:
units = available_units[:2]
unit_xids = random.choices(available_units.as_external_ids(), k=3)

retrieved_units = cognite_client.units.retrieve(units.as_external_ids())
retrieved_units = cognite_client.units.retrieve(unit_xids)

assert retrieved_units == units
assert retrieved_units.as_external_ids() == unit_xids

def test_retrieve_raise_non_existing_unit(self, cognite_client: CogniteClient, available_units: UnitList) -> None:
with pytest.raises(CogniteNotFoundError):
cognite_client.units.retrieve([available_units[0].external_id, "non-existing-unit"])

def test_retrieve_none_for_single_non_existing_unit(
self, cognite_client: CogniteClient, available_units: UnitList
) -> None:
def test_retrieve_none_for_single_non_existing_unit(self, cognite_client: CogniteClient) -> None:
retrieved_unit = cognite_client.units.retrieve("non-existing-unit", ignore_unknown_ids=True)
assert retrieved_unit is None

Expand All @@ -55,3 +56,55 @@ def test_list_unit_systems(self, cognite_client: CogniteClient) -> None:
unit_systems = cognite_client.units.systems.list()

assert len(unit_systems) >= 1, "Expected to get some unit systems"


class TestFromAlias:
@pytest.mark.parametrize(
"alias, quantity, expected_xid",
(
("cubic decimetre per minute", None, "volume_flow_rate:decim3-per-min"),
("cubic decimetre per minute", "Volume Flow Rate", "volume_flow_rate:decim3-per-min"),
("cubic decimetre per minute", "volume FLOW rate", "volume_flow_rate:decim3-per-min"),
("megavolt ampere hr", None, "energy:megav-a-hr"),
("megavolt ampere hr", "Energy", "energy:megav-a-hr"),
("megavolt ampere hr", "enerGY", "energy:megav-a-hr"),
),
)
def test_lookup_unit_from_alias(self, cognite_client, alias, quantity, expected_xid):
unit = cognite_client.units.from_alias(alias, quantity)
assert unit.external_id == expected_xid

def test_lookup_unit_from_alias_unknown_quantity(self, cognite_client):
match = re.escape("Unknown quantity='Not Energy'. Did you mean one of: ['Energy']? All known quantities: [")
with pytest.raises(ValueError, match=match):
cognite_client.units.from_alias("cubic decimetre per minute", "Not Energy")

def test_lookup_ambiguous(self, cognite_client):
match = re.escape("Ambiguous alias, matches all of: ['capacitance:farad', 'temperature:deg_f']")
with pytest.raises(ValueError, match=match):
cognite_client.units.from_alias("F", None)
with pytest.raises(ValueError, match=match):
cognite_client.units.from_alias("F", None, return_ambiguous=False)

units = cognite_client.units.from_alias("F", None, return_ambiguous=True)
assert units.as_external_ids() == ["capacitance:farad", "temperature:deg_f"]

def test_lookup_closest_match__only_alias(self, cognite_client):
# Ensure it fails without 'closest matches':
match = re.escape("Unknown alias='c mol', did you mean one of: ['")
with pytest.raises(ValueError, match=match):
cognite_client.units.from_alias("c mol", return_closest_matches=False)

units = cognite_client.units.from_alias("c mol", return_closest_matches=True)
assert len(units) > 1
assert "amount_of_substance:centimol" in units.as_external_ids()

def test_lookup_closest_match_alias_and_quantity(self, cognite_client):
# Ensure it fails without 'closest matches':
match = re.escape("Unknown alias='imp force' for known quantity='Force'. Did you mean one of: ['")
with pytest.raises(ValueError, match=match):
cognite_client.units.from_alias("imp force", "Force", return_closest_matches=False)

units = cognite_client.units.from_alias("imp force", "Force", return_closest_matches=True)
assert len(units) > 1
assert "force:lb_f" in units.as_external_ids()

0 comments on commit 03a073d

Please sign in to comment.