diff --git a/CHANGELOG.md b/CHANGELOG.md index 88d1dbe..23ea285 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## Unreleased + +- Cache remote JSON schemas for extensions (#155, @avbentem) + ## 3.1.0 (2024-05-21) - Allow extra fields in Links (#144, @jonhealy1) diff --git a/README.md b/README.md index b810802..ec0cba4 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ assert catalog.links[0].href == "item.json" ### Extensions -STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` will validate a `dict`, `Item`, `Collection` or `Catalog` against the schema urls provided in the `stac_extensions` property: +STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` gets the JSON schemas from the URLs provided in the `stac_extensions` property (caching the last fetched ones), and will validate a `dict`, `Item`, `Collection` or `Catalog` against those fetched schemas: ```python from stac_pydantic import Item diff --git a/stac_pydantic/extensions.py b/stac_pydantic/extensions.py index 2a4f773..1818e8c 100644 --- a/stac_pydantic/extensions.py +++ b/stac_pydantic/extensions.py @@ -1,4 +1,5 @@ import json +from functools import lru_cache from typing import Any, Dict, Union import jsonschema @@ -9,10 +10,21 @@ from stac_pydantic.item import Item +@lru_cache(maxsize=128) +def _fetch_and_cache_schema(url: str) -> dict: + """Fetch the remote JSON schema, if not already cached.""" + req = requests.get(url) + return req.json() + + def validate_extensions( stac_obj: Union[Item, Collection, Catalog, Dict[str, Any]], reraise_exception: bool = False, ) -> bool: + """ + Fetch the remote JSON schema, if not already cached, and validate the STAC + object against that schema. + """ if isinstance(stac_obj, dict): stac_dict = stac_obj else: @@ -23,8 +35,7 @@ def validate_extensions( try: if stac_dict["stac_extensions"]: for ext in stac_dict["stac_extensions"]: - req = requests.get(ext) - schema = req.json() + schema = _fetch_and_cache_schema(ext) jsonschema.validate(instance=stac_dict, schema=schema) except Exception: if reraise_exception: diff --git a/tests/test_models.py b/tests/test_models.py index d5c08f8..6e10a2f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -6,7 +6,7 @@ from shapely.geometry import shape from stac_pydantic import Collection, Item, ItemProperties -from stac_pydantic.extensions import validate_extensions +from stac_pydantic.extensions import _fetch_and_cache_schema, validate_extensions from stac_pydantic.links import Link, Links from stac_pydantic.shared import MimeTypes, StacCommonMetadata @@ -116,6 +116,22 @@ def test_explicit_extension_validation() -> None: validate_extensions(test_item) +def test_extension_validation_schema_cache() -> None: + # Defines 3 extensions, but one is a non-existing URL + test_item = request(EO_EXTENSION) + + _fetch_and_cache_schema.cache_clear() + + assert not validate_extensions(test_item) + assert _fetch_and_cache_schema.cache_info().hits == 0 + assert _fetch_and_cache_schema.cache_info().misses == 3 + + assert not validate_extensions(test_item) + assert _fetch_and_cache_schema.cache_info().hits == 2 + # The non-existing URL will have failed, hence retried + assert _fetch_and_cache_schema.cache_info().misses == 4 + + @pytest.mark.parametrize( "infile,model", [(EO_EXTENSION, Item), (COLLECTION, Collection)],