From 246803e4b9710797de0cb1b7247e6c35210cc210 Mon Sep 17 00:00:00 2001 From: Arjan van Bentem Date: Sun, 7 Jul 2024 22:09:23 +0200 Subject: [PATCH 1/3] Cache remote JSON schemas --- CHANGELOG.md | 4 ++++ README.md | 2 +- stac_pydantic/extensions.py | 15 +++++++++++++-- tests/test_models.py | 18 +++++++++++++++++- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88d1dbe..24cc9ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## Unreleased + +- Cache remote JSON schemas for extensions (TBD, @avbentem) + ## 3.1.0 (2024-05-21) - Allow extra fields in Links (#144, @jonhealy1) diff --git a/README.md b/README.md index b810802..ec0cba4 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ assert catalog.links[0].href == "item.json" ### Extensions -STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` will validate a `dict`, `Item`, `Collection` or `Catalog` against the schema urls provided in the `stac_extensions` property: +STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` gets the JSON schemas from the URLs provided in the `stac_extensions` property (caching the last fetched ones), and will validate a `dict`, `Item`, `Collection` or `Catalog` against those fetched schemas: ```python from stac_pydantic import Item diff --git a/stac_pydantic/extensions.py b/stac_pydantic/extensions.py index 2a4f773..5baf498 100644 --- a/stac_pydantic/extensions.py +++ b/stac_pydantic/extensions.py @@ -1,4 +1,5 @@ import json +from functools import lru_cache from typing import Any, Dict, Union import jsonschema @@ -9,10 +10,21 @@ from stac_pydantic.item import Item +@lru_cache(maxsize=128) +def _fetch_schema(url: str) -> dict: + """Fetch the remote JSON schema, if not already cached.""" + req = requests.get(url) + return req.json() + + def validate_extensions( stac_obj: Union[Item, Collection, Catalog, Dict[str, Any]], reraise_exception: bool = False, ) -> bool: + """ + Fetch the remote JSON schema, if not already cached, and validate the STAC + object against that schema. + """ if isinstance(stac_obj, dict): stac_dict = stac_obj else: @@ -23,8 +35,7 @@ def validate_extensions( try: if stac_dict["stac_extensions"]: for ext in stac_dict["stac_extensions"]: - req = requests.get(ext) - schema = req.json() + schema = _fetch_schema(ext) jsonschema.validate(instance=stac_dict, schema=schema) except Exception: if reraise_exception: diff --git a/tests/test_models.py b/tests/test_models.py index d5c08f8..0b135e4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -6,7 +6,7 @@ from shapely.geometry import shape from stac_pydantic import Collection, Item, ItemProperties -from stac_pydantic.extensions import validate_extensions +from stac_pydantic.extensions import _fetch_schema, validate_extensions from stac_pydantic.links import Link, Links from stac_pydantic.shared import MimeTypes, StacCommonMetadata @@ -116,6 +116,22 @@ def test_explicit_extension_validation() -> None: validate_extensions(test_item) +def test_extension_validation_schema_cache() -> None: + # Defines 3 extensions, but one is a non-existing URL + test_item = request(EO_EXTENSION) + + _fetch_schema.cache_clear() + + assert not validate_extensions(test_item) + assert _fetch_schema.cache_info().hits == 0 + assert _fetch_schema.cache_info().misses == 3 + + assert not validate_extensions(test_item) + assert _fetch_schema.cache_info().hits == 2 + # The non-existing URL will have failed, hence retried + assert _fetch_schema.cache_info().misses == 4 + + @pytest.mark.parametrize( "infile,model", [(EO_EXTENSION, Item), (COLLECTION, Collection)], From b7f22c388b0df29dd5da1a9d5149b6f466f922b2 Mon Sep 17 00:00:00 2001 From: Arjan van Bentem Date: Sun, 7 Jul 2024 22:17:57 +0200 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24cc9ae..23ea285 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## Unreleased -- Cache remote JSON schemas for extensions (TBD, @avbentem) +- Cache remote JSON schemas for extensions (#155, @avbentem) ## 3.1.0 (2024-05-21) From 03f08739d7664982d17b76c431cebf33714f9e0b Mon Sep 17 00:00:00 2001 From: Arjan van Bentem Date: Sun, 7 Jul 2024 22:30:29 +0200 Subject: [PATCH 3/3] Make function name explicit --- stac_pydantic/extensions.py | 4 ++-- tests/test_models.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/stac_pydantic/extensions.py b/stac_pydantic/extensions.py index 5baf498..1818e8c 100644 --- a/stac_pydantic/extensions.py +++ b/stac_pydantic/extensions.py @@ -11,7 +11,7 @@ @lru_cache(maxsize=128) -def _fetch_schema(url: str) -> dict: +def _fetch_and_cache_schema(url: str) -> dict: """Fetch the remote JSON schema, if not already cached.""" req = requests.get(url) return req.json() @@ -35,7 +35,7 @@ def validate_extensions( try: if stac_dict["stac_extensions"]: for ext in stac_dict["stac_extensions"]: - schema = _fetch_schema(ext) + schema = _fetch_and_cache_schema(ext) jsonschema.validate(instance=stac_dict, schema=schema) except Exception: if reraise_exception: diff --git a/tests/test_models.py b/tests/test_models.py index 0b135e4..6e10a2f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -6,7 +6,7 @@ from shapely.geometry import shape from stac_pydantic import Collection, Item, ItemProperties -from stac_pydantic.extensions import _fetch_schema, validate_extensions +from stac_pydantic.extensions import _fetch_and_cache_schema, validate_extensions from stac_pydantic.links import Link, Links from stac_pydantic.shared import MimeTypes, StacCommonMetadata @@ -120,16 +120,16 @@ def test_extension_validation_schema_cache() -> None: # Defines 3 extensions, but one is a non-existing URL test_item = request(EO_EXTENSION) - _fetch_schema.cache_clear() + _fetch_and_cache_schema.cache_clear() assert not validate_extensions(test_item) - assert _fetch_schema.cache_info().hits == 0 - assert _fetch_schema.cache_info().misses == 3 + assert _fetch_and_cache_schema.cache_info().hits == 0 + assert _fetch_and_cache_schema.cache_info().misses == 3 assert not validate_extensions(test_item) - assert _fetch_schema.cache_info().hits == 2 + assert _fetch_and_cache_schema.cache_info().hits == 2 # The non-existing URL will have failed, hence retried - assert _fetch_schema.cache_info().misses == 4 + assert _fetch_and_cache_schema.cache_info().misses == 4 @pytest.mark.parametrize(