From 79991a662249b132c051c0942c0d927e3b8da2c2 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Wed, 6 Sep 2023 16:13:36 -0400 Subject: [PATCH] refactor: expose catalog as Colormap classmethod (#22) * refactor: update catalog, make public * name methods * test: add tests * fix lint * test: add test * add unique names --- .pre-commit-config.yaml | 5 - docs/_gen_cmaps.py | 9 +- docs/_hooks.py | 10 +- src/cmap/__init__.py | 53 ++++++++ src/cmap/_catalog.py | 269 +++++++++++++++++++++++++++++++--------- src/cmap/_colormap.py | 21 +++- src/cmap/_external.py | 2 +- tests/test_catalog.py | 40 +++++- tests/test_data.py | 21 +--- 9 files changed, 341 insertions(+), 89 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96a9c164c..7a2f5256a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,11 +6,6 @@ ci: exclude: ^LICENSE repos: - # - repo: https://github.com/crate-ci/typos - # rev: v1.16.10 - # hooks: - # - id: typos - - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/docs/_gen_cmaps.py b/docs/_gen_cmaps.py index 20cb9c3d8..3b2584ca4 100644 --- a/docs/_gen_cmaps.py +++ b/docs/_gen_cmaps.py @@ -1,10 +1,13 @@ import json from pathlib import Path +from typing import TYPE_CHECKING, cast import mkdocs_gen_files import numpy as np -from cmap import Colormap, _catalog +if TYPE_CHECKING: + from cmap import _catalog +from cmap import Colormap from cmap._util import report # TODO: convert to jinja @@ -80,7 +83,7 @@ ) -def build_catalog(catalog: _catalog.Catalog) -> None: +def build_catalog(catalog: "_catalog.Catalog") -> None: for name in catalog: if ":" not in name: continue @@ -132,4 +135,4 @@ def _make_aliases_md(aliases: list[str]) -> str: return "**Aliases**: " + ", ".join(f"`{a}`" for a in aliases) -build_catalog(_catalog.catalog) +build_catalog(cast("_catalog.Catalog", Colormap.catalog())) diff --git a/docs/_hooks.py b/docs/_hooks.py index d9c470d37..714285de0 100644 --- a/docs/_hooks.py +++ b/docs/_hooks.py @@ -3,14 +3,18 @@ import sys from functools import partial from pathlib import Path -from typing import Any, Sequence +from typing import TYPE_CHECKING, Any, Sequence, cast import numpy as np from cmap import Colormap, _util -from cmap._catalog import CATALOG + +if TYPE_CHECKING: + from cmap._catalog import CatalogDict from cmap._color import NAME_TO_RGB +CATALOG = cast("CatalogDict", Colormap.catalog()._data) # type: ignore + # the template for a single colormap CMAP_DIV = """
@@ -92,7 +96,7 @@ def _cmap_catalog() -> str: continue category = details.get("category") or "Uncategorized" categories.add(category) - classes = ["filterDiv"] + [category.lower()] + classes = ["filterDiv", category.lower()] lines.append(_cmap_div(cmap_name, classes)) btns = [ diff --git a/src/cmap/__init__.py b/src/cmap/__init__.py index 76e0a8a52..e05b6bd90 100644 --- a/src/cmap/__init__.py +++ b/src/cmap/__init__.py @@ -1,17 +1,70 @@ """Scientific colormaps for python, without dependencies.""" from importlib.metadata import PackageNotFoundError, version +from typing import TYPE_CHECKING, Iterator, Mapping try: __version__ = version("cmap") except PackageNotFoundError: # pragma: no cover __version__ = "uninstalled" + from ._color import HSLA, HSVA, RGBA, RGBA8, Color from ._colormap import Colormap +if TYPE_CHECKING: + from ._catalog import CatalogItem + + class Catalog(Mapping[str, CatalogItem]): + """Catalog of available colormaps.""" + + def __getitem__(self, name: str) -> CatalogItem: + """Get a catalog item by name.""" + + def __iter__(self) -> Iterator[str]: + """Iterate over available colormap keys.""" + + def __len__(self) -> int: + """Return the number of available colormap keys. + + Note: this is greater than the number of colormaps, as each colormap + may have multiple aliases. + """ + + def unique_keys( + self, prefer_short_names: bool = True, normalized_names: bool = False + ) -> set[str]: + """Return names that refer to unique colormap data. + + Parameters + ---------- + prefer_short_names : bool, optional + If True (default), short names (without the namespace prefix) will be + preferred over fully qualified names. In cases where the same short name + is used in multiple namespaces, they will *all* be referred to by their + fully qualified (namespaced) name. + normalized_names : bool, optional + If True, return the normalized names of the colormaps. If False + (default), return the original names of the colormaps (which may include + spaces and/or capital letters). + """ + + def short_keys(self) -> set[str]: + """Return a set of available short colormap names, without namespace.""" + + def namespaced_keys(self) -> set[str]: + """Return a set of available short colormap names, with namespace.""" + + def resolve(self, name: str) -> str: + """Return the fully qualified, normalized name of a colormap or alias.""" + +else: + from ._catalog import Catalog, CatalogItem + __all__ = [ "Color", "Colormap", + "CatalogItem", + "Catalog", "HSLA", "HSVA", "RGBA", diff --git a/src/cmap/_catalog.py b/src/cmap/_catalog.py index f1ce08c1d..e59286fd4 100644 --- a/src/cmap/_catalog.py +++ b/src/cmap/_catalog.py @@ -11,12 +11,21 @@ import logging from dataclasses import dataclass, field from pathlib import Path -from typing import TYPE_CHECKING, Iterator, Literal, Mapping, cast +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + Iterator, + Literal, + Mapping, + cast, +) import cmap.data if TYPE_CHECKING: - from typing_extensions import NotRequired, TypeAlias, TypedDict + from _typeshed import FileDescriptorOrPath + from typing_extensions import NotRequired, Required, TypeAlias, TypedDict from ._colormap import ColorStopsLike, Interpolation @@ -24,7 +33,7 @@ "sequential", "diverging", "cyclic", "qualitative", "miscellaneous" ] - class CatalogItem(TypedDict): + class UnloadedCatalogItem(TypedDict): data: str category: Category tags: NotRequired[list[str]] @@ -32,21 +41,70 @@ class CatalogItem(TypedDict): info: NotRequired[str] aliases: NotRequired[list[str]] - class CatalogAlias(TypedDict): + class UnloadedCatalogAlias(TypedDict): alias: str conflicts: NotRequired[list[str]] - CatalogDict: TypeAlias = dict[str, CatalogItem] + CatalogDict: TypeAlias = dict[str, UnloadedCatalogItem | UnloadedCatalogAlias] -logger = logging.getLogger("cmap") + class RecordItem(TypedDict): + """Json schema for a single colormap record file.""" + namespace: Required[str] + colormaps: Required[CatalogDict] + # globals that override colormap values if present + license: str + source: str + authors: list[str] + category: Category -def _norm_name(name: str) -> str: - return name.lower().replace(" ", "_").replace("-", "_") + +logger = logging.getLogger("cmap") +RECORD_PATTERN = "record.json" +DATA_ROOT = Path(cmap.data.__file__).parent +NAMESPACE_DELIMITER = ":" @dataclass -class LoadedCatalogItem: +class CatalogItem: + """A loaded catalog item. + + Attributes + ---------- + data: ColorStopsLike + Any object that can be passed to `Colormap` to create a colormap. + https://cmap-docs.readthedocs.io/en/latest/colormaps/#colormaplike-objects + name: str + The (short) name of the colormap, e.g. "viridis". + category: str + The category of the colormap. One of {"sequential", "diverging", "cyclic", + "qualitative", "miscellaneous"}. + license: str + The license of the colormap. + source: str + The source of the colormap (usually a URL). + info: str + A description of the colormap. Will be displayed on the colormap page in the + documentation. + namespace: str + The namespace of the colormap. This is a cmap-specific namespace for organizing + colormaps into collections. (e.g. "matplotlib", "cmocean", "colorcet", etc.) + authors: list[str] + A list of authors of the colormap. + interpolation: bool | Interpolation + The interpolation method to use when sampling the colormap. One of + {False, True, "linear", "nearest"}, where False is equivalent to "nearest" + and True is equivalent to "linear". + tags: list[str] + A list of tags for the colormap. These are displayed in the documentation. + aliases: list[str] + A list of aliases for the colormap. These are alternative names that can be + used to access the colormap. Currently, they must be accessed using the + fully qualified name (`namespace:alias`). + qualified_name: str + The fully qualified name of the colormap, e.g. "matplotlib:viridis". + """ + data: ColorStopsLike name: str category: Category @@ -61,57 +119,61 @@ class LoadedCatalogItem: @property def qualified_name(self) -> str: - return f"{self.namespace}:{self.name}" - - -CATALOG: dict[str, CatalogItem | CatalogAlias] = {} + return f"{self.namespace}{NAMESPACE_DELIMITER}{self.name}" -def _populate_catalog() -> None: +def _build_catalog(records: Iterable[FileDescriptorOrPath]) -> CatalogDict: """Populate the catalog with data from the data directory.""" # FIXME: if a new collection is added, it has the potential to break # existing code that uses the old name without a namespace. One way # to avoid this would be to explicitly list the collections here. # but then new collections would need to be added here to be # available. - for r in sorted(Path(cmap.data.__file__).parent.rglob("record.json")): - with open(r) as f: - data = json.load(f) + ctlg: CatalogDict = {} + + for record_file in records: + with open(record_file) as f: + data = cast("RecordItem", json.load(f)) namespace = data["namespace"] for name, v in data["colormaps"].items(): - namespaced = f"{namespace}:{name}" + if NAMESPACE_DELIMITER in name: # pragma: no cover + raise ValueError(f"colormap name {name!r} should not have colon.") + if NAMESPACE_DELIMITER in namespace: # pragma: no cover + raise ValueError(f"namespace {namespace!r} should not have colon.") + + namespaced = f"{namespace}{NAMESPACE_DELIMITER}{name}" - # if the key "alias" exists, this is a CatalogAlias. + # if the key "alias" exists, this is a UnloadedCatalogAlias. # We just add it to the catalog under both the namespaced name # and the short name. The Catalog._load method will handle the resolution # of the alias. if "alias" in v: - v = cast("CatalogAlias", v) - if ":" not in v["alias"]: # pragma: no cover + v = cast("UnloadedCatalogAlias", v) + if NAMESPACE_DELIMITER not in v["alias"]: # pragma: no cover raise ValueError(f"{namespaced!r} alias is not namespaced") - CATALOG[namespaced] = v - CATALOG[name] = v # FIXME + ctlg[namespaced] = v + ctlg[name] = v # FIXME continue # otherwise we have a CatalogItem - v = cast("CatalogItem", v) + v = cast("UnloadedCatalogItem", v) # here we add any global keys to the colormap that are not already there. for k in ("license", "namespace", "source", "authors", "category"): if k in data: - v.setdefault(k, data[k]) + v.setdefault(k, data[k]) # type: ignore # add the fully namespaced colormap to the catalog - CATALOG[namespaced] = v + ctlg[namespaced] = v # if the short name is not already in the catalog, add it as a pointer # to the fully namespaced colormap. - if name not in CATALOG: - CATALOG[name] = {"alias": namespaced, "conflicts": []} + if name not in ctlg: + ctlg[name] = {"alias": namespaced, "conflicts": []} else: # if the short name is already in the catalog, we have a conflict. # add the fully namespaced name to the conflicts list. - entry = cast("CatalogAlias", CATALOG[name]) + entry = cast("UnloadedCatalogAlias", ctlg[name]) entry.setdefault("conflicts", []).append(namespaced) # lastly, the `aliases` key of a colormap refers to aliases within the @@ -119,34 +181,128 @@ def _populate_catalog() -> None: # namespaced name (with a colon). We add these to the catalog as well # so that they can be for alias in v.get("aliases", []): - if ":" in alias: # pragma: no cover + if NAMESPACE_DELIMITER in alias: # pragma: no cover raise ValueError( f"internal alias {alias!r} in namespace {namespace} " "should not have colon." ) - CATALOG[f"{namespace}:{alias}"] = {"alias": namespaced} - - -_populate_catalog() -_CATALOG_LOWER = {_norm_name(k): v for k, v in CATALOG.items()} -_ALIASES: dict[str, list[str]] = {} -for k, v in _CATALOG_LOWER.items(): - if alias := v.get("alias"): - _ALIASES.setdefault(_norm_name(alias), []).append(k) # type: ignore - + ctlg[f"{namespace}{NAMESPACE_DELIMITER}{alias}"] = {"alias": namespaced} + + return ctlg + + +class Catalog(Mapping[str, "CatalogItem"]): + """Catalog of available colormaps. + + Parameters + ---------- + root : Path, optional + Path to the root of the data directory, by default uses the `cmap.data` folder. + record_pattern : str, optional + Glob pattern to use to find record files, by default "record.json". + """ + + def __init__( + self, data_root: Path = DATA_ROOT, record_pattern: str = RECORD_PATTERN + ) -> None: + self._data_root = data_root + self._record_pattern = record_pattern + + # a cache of loaded CatalogItem + self._loaded: dict[str, CatalogItem] = {} + + # _data is a mapping of ALL possible (normalized) names to colormap data. + # this includes both short names and namespaced names. + self._data: CatalogDict = {} + # original names maps the original name as it appeared in the record to the + # normalized name in _data + self._original_names: dict[str, str] = {} + # _aliases maps short names to fully namespaced names + self._aliases: dict[str, str] = {} + # _rev_aliases maps fully qualified names to a list of aliases + self._rev_aliases: dict[str, list[str]] = {} + + for name, data in _build_catalog( + sorted(data_root.rglob(record_pattern)) + ).items(): + normed_name = self._norm_name(name) + self._original_names[name] = normed_name + self._data[normed_name] = data + if alias := data.get("alias"): + self._aliases[normed_name] = cast(str, alias) + self._rev_aliases.setdefault(self._norm_name(alias), []).append( + normed_name + ) -class Catalog(Mapping[str, "LoadedCatalogItem"]): - _loaded: dict[str, LoadedCatalogItem] = {} + def unique_keys( + self, prefer_short_names: bool = True, normalized_names: bool = False + ) -> set[str]: + """Return names that refer to unique colormap data. + + Parameters + ---------- + prefer_short_names : bool, optional + If True (default), short names (without the namespace prefix) will be + preferred over fully qualified names. In cases where the same short name is + used in multiple namespaces, they will *all* be referred to by their fully + qualified (namespaced) name. + normalized_names : bool, optional + If True, return the normalized names of the colormaps. If False (default), + return the original names of the colormaps (which may include spaces and/or + capital letters). + + Returns + ------- + set[str] + A set of unique colormap names that can be used to access the colormap data. + """ + keys: set[str] = set() + for original_name, normed_name in self._original_names.items(): + if "alias" in self._data[normed_name]: + continue + if prefer_short_names: + short_name = normed_name.split(NAMESPACE_DELIMITER, 1)[-1] + data2 = self._data[short_name] + if not data2.get("conflicts") and data2.get("alias") == original_name: + keys.add( + short_name + if normalized_names + else original_name.split(NAMESPACE_DELIMITER, 1)[-1] + ) + continue + keys.add(normed_name if normalized_names else original_name) + return keys + + def short_keys(self) -> set[str]: + """Return a set of available short colormap names, without namespace.""" + return {n for n in self._original_names if NAMESPACE_DELIMITER not in n} + + def namespaced_keys(self) -> set[str]: + """Return a set of available short colormap names, with namespace.""" + return {n for n in self._original_names if NAMESPACE_DELIMITER in n} + + def resolve(self, name: str) -> str: + """Return the fully qualified, normalized name of a colormap or alias.""" + nn = self._norm_name(name) + if nn in self._aliases: + return self._aliases[nn] + if nn in self._data: + return nn + raise KeyError(f"Could not find colormap with name {name!r}.") + + def _ipython_key_completions_(self) -> list[str]: + """Support ipython tab completion.""" + return list(self._data) def __iter__(self) -> Iterator[str]: - return iter(CATALOG) + return iter(self._original_names) def __len__(self) -> int: - return len(CATALOG) + return len(self._original_names) - def __getitem__(self, name: str) -> LoadedCatalogItem: + def __getitem__(self, name: str) -> CatalogItem: if name not in self._loaded: - if (key := _norm_name(name)) not in _CATALOG_LOWER: + if (key := self._norm_name(name)) not in self._data: # TODO: print a list of available colormaps or something if name != key: # pragma: no cover raise ValueError(f"Colormap {name!r} (or {key!r}) not found.") @@ -158,32 +314,33 @@ def __getitem__(self, name: str) -> LoadedCatalogItem: self._loaded[key] = self._loaded[name] return self._loaded[name] - def _load(self, key: str) -> LoadedCatalogItem: + def _load(self, normed_key: str) -> CatalogItem: """Get the data for a named colormap.""" - item = _CATALOG_LOWER[key] + item = self._data[normed_key] # aliases are just pointers to other colormaps if "alias" in item: - item = cast("CatalogAlias", item) + item = cast("UnloadedCatalogAlias", item) namespaced = item["alias"] if conflicts := item.get("conflicts"): logger.warning( - f"WARNING: The name {key!r} is an alias for {namespaced!r}, " + f"WARNING: The name {normed_key!r} is an alias for {namespaced!r}, " f"but is also available as: {', '.join(conflicts)!r}.\nTo " "silence this warning, use a fully namespaced name.", ) return self[namespaced] - _item = cast("CatalogItem", item.copy()) + _item = cast("UnloadedCatalogItem", item.copy()) # if a string, it is a module:attribute reference to a ColormapLike object # load it here. if isinstance(_item["data"], str): - module, attr = _item["data"].rsplit(":", 1) + module, attr = _item["data"].rsplit(NAMESPACE_DELIMITER, 1) # not encouraged... but significantly faster than importlib # well tested on internal data though mod = __import__(module, fromlist=[attr]) _item["data"] = getattr(mod, attr) - _item["aliases"] = _ALIASES.get(key, []) - return LoadedCatalogItem(name=key.split(":", 1)[-1], **_item) - + _item["aliases"] = self._rev_aliases.get(normed_key, []) + return CatalogItem(name=normed_key.split(NAMESPACE_DELIMITER, 1)[-1], **_item) -catalog = Catalog() + @staticmethod + def _norm_name(name: Any) -> str: + return str(name).lower().replace(" ", "_").replace("-", "_") diff --git a/src/cmap/_colormap.py b/src/cmap/_colormap.py index 404142015..66ffbf196 100644 --- a/src/cmap/_colormap.py +++ b/src/cmap/_colormap.py @@ -10,6 +10,7 @@ Callable, Iterable, Iterator, + Mapping, NamedTuple, Sequence, Union, @@ -21,7 +22,6 @@ import numpy.typing as npt from . import _external -from ._catalog import catalog from ._color import Color if TYPE_CHECKING: @@ -36,7 +36,7 @@ from numpy.typing import ArrayLike, NDArray from typing_extensions import TypeAlias, TypedDict, TypeGuard - from ._catalog import LoadedCatalogItem + from ._catalog import CatalogItem from ._color import ColorLike Interpolation = Literal["linear", "nearest"] @@ -128,7 +128,18 @@ class Colormap: identifier: str category: str | None interpolation: Interpolation - info: LoadedCatalogItem | None + info: CatalogItem | None + + _catalog_instance: Mapping[str, CatalogItem] | None = None + + @classmethod + def catalog(cls) -> Mapping[str, CatalogItem]: + """Return the global colormaps catalog.""" + if cls._catalog_instance is None: + from ._catalog import Catalog + + cls._catalog_instance = Catalog() + return cls._catalog_instance def __init__( self, @@ -141,7 +152,7 @@ def __init__( ) -> None: if isinstance(value, str): rev = value.endswith("_r") - info = catalog[value[:-2] if rev else value] + info = self.catalog()[value[:-2] if rev else value] name = name or f"{info.namespace}:{info.name}" category = category or info.category self.info = info @@ -1141,7 +1152,7 @@ def _parse_colorstops( if isinstance(val, str): rev = val.endswith("_r") - data = catalog[val[:-2] if rev else val] + data = Colormap.catalog()[val[:-2] if rev else val] stops = _parse_colorstops(data.data, cls=cls) stops._interpolation = _norm_interp(data.interpolation) return stops.reversed() if rev else stops diff --git a/src/cmap/_external.py b/src/cmap/_external.py index 3c877237a..f10ef1aa4 100644 --- a/src/cmap/_external.py +++ b/src/cmap/_external.py @@ -163,7 +163,7 @@ def rich_print_colormap(cm: Colormap, width: int | None = None) -> None: console = get_console() color_cell = Text("") # if cm.interpolation == "nearest": - # width = len(cm.color_stops) + # width = len(cm.color_stops) width = width or (console.width - 12) for color in cm.iter_colors(width): color_cell += Text(" ", style=Style(bgcolor=color.hex[:7])) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index d3992f052..40ddfb162 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,8 +1,12 @@ +from itertools import chain + import numpy as np import pytest from cmap import Colormap -from cmap._catalog import catalog +from cmap._catalog import Catalog + +catalog = Catalog() @pytest.mark.filterwarnings("ignore:The name:") @@ -26,3 +30,37 @@ def test_catalog_data() -> None: Colormap(name) # smoke test assert len(catalog) > 100 + + +def test_lower_map() -> None: + # make sure the lower map is the same length as the original + # ... i.e. that we have no name collisions + assert len(catalog._data) == len(catalog._data) + + +def test_data_loading() -> None: + for name in catalog._original_names: + Colormap(name) + + +def test_catalog_names() -> None: + assert "bids:viridis" in catalog.namespaced_keys() + assert "viridis" in catalog.short_keys() + assert [ + catalog.resolve(x) + for x in chain(catalog.short_keys(), catalog.namespaced_keys()) + ] + with pytest.raises(KeyError): + catalog.resolve("not-a-cmap") + + unique = catalog.unique_keys(prefer_short_names=True, normalized_names=False) + assert "ice" not in unique + assert "viridis" in unique + assert "cmocean:ice" in unique + assert "YlGn" in unique + unique = catalog.unique_keys(prefer_short_names=False, normalized_names=True) + assert "colorbrewer:ylgn" in unique + assert "colorbrewer:YlGn" not in unique + assert "viridis" not in unique + assert "bids:viridis" in unique + assert "matplotlib:viridis" not in unique diff --git a/tests/test_data.py b/tests/test_data.py index 9b9e224e2..416d64177 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -4,7 +4,7 @@ import numpy.testing as npt import pytest -from cmap import Colormap, _catalog +from cmap import Colormap try: import matplotlib as mpl @@ -18,22 +18,13 @@ if TYPE_CHECKING: from matplotlib.colors import Colormap as MPLColormap - -def test_lower_map() -> None: - # make sure the lower map is the same length as the original - # ... i.e. that we have no name collisions - assert len(_catalog._CATALOG_LOWER) == len(_catalog.CATALOG) - - -def test_data_loading() -> None: - for name in _catalog.CATALOG: - Colormap(name) +catalog = Colormap.catalog() def test_matplotlib_name_parity() -> None: if not MPL_CMAPS: pytest.skip("matplotlib not installed") - if missing := (MPL_CMAPS - set(_catalog.CATALOG)): + if missing := (MPL_CMAPS - set(catalog._original_names)): raise AssertionError(f"missing cmap keys from matplotlib: {missing}") @@ -52,14 +43,14 @@ def test_napari_name_parity() -> None: if not n.endswith(("_r", " r")) } - catalog = set(_catalog._CATALOG_LOWER) - if missing := (napari_cmaps - catalog): + lower_names = set(catalog._data) + if missing := (napari_cmaps - lower_names): # NOTE: there are a number of colormap names in vispy that are too specific # to be included in the main catalog. # They are added under the `vispy_` prefix. none of these are "publicly" used # by napari, but we make sure they're available as vispy+name here. for m in list(missing): - if f"vispy_{m}" in catalog: + if f"vispy_{m}" in lower_names: missing.remove(m) if missing: raise AssertionError(f"missing cmap keys from napari: {missing}")