Skip to content

Commit

Permalink
Cache remote JSON schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
avbentem committed Jul 7, 2024
1 parent b9fa617 commit 246803e
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Unreleased

- Cache remote JSON schemas for extensions (TBD, @avbentem)

## 3.1.0 (2024-05-21)

- Allow extra fields in Links (#144, @jonhealy1)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ assert catalog.links[0].href == "item.json"

### Extensions

STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` will validate a `dict`, `Item`, `Collection` or `Catalog` against the schema urls provided in the `stac_extensions` property:
STAC defines many extensions which let the user customize the data in their catalog. `stac-pydantic.extensions.validate_extensions` gets the JSON schemas from the URLs provided in the `stac_extensions` property (caching the last fetched ones), and will validate a `dict`, `Item`, `Collection` or `Catalog` against those fetched schemas:

```python
from stac_pydantic import Item
Expand Down
15 changes: 13 additions & 2 deletions stac_pydantic/extensions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from functools import lru_cache
from typing import Any, Dict, Union

import jsonschema
Expand All @@ -9,10 +10,21 @@
from stac_pydantic.item import Item


@lru_cache(maxsize=128)
def _fetch_schema(url: str) -> dict:
"""Fetch the remote JSON schema, if not already cached."""
req = requests.get(url)
return req.json()


def validate_extensions(
stac_obj: Union[Item, Collection, Catalog, Dict[str, Any]],
reraise_exception: bool = False,
) -> bool:
"""
Fetch the remote JSON schema, if not already cached, and validate the STAC
object against that schema.
"""
if isinstance(stac_obj, dict):
stac_dict = stac_obj
else:
Expand All @@ -23,8 +35,7 @@ def validate_extensions(
try:
if stac_dict["stac_extensions"]:
for ext in stac_dict["stac_extensions"]:
req = requests.get(ext)
schema = req.json()
schema = _fetch_schema(ext)
jsonschema.validate(instance=stac_dict, schema=schema)
except Exception:
if reraise_exception:
Expand Down
18 changes: 17 additions & 1 deletion tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from shapely.geometry import shape

from stac_pydantic import Collection, Item, ItemProperties
from stac_pydantic.extensions import validate_extensions
from stac_pydantic.extensions import _fetch_schema, validate_extensions
from stac_pydantic.links import Link, Links
from stac_pydantic.shared import MimeTypes, StacCommonMetadata

Expand Down Expand Up @@ -116,6 +116,22 @@ def test_explicit_extension_validation() -> None:
validate_extensions(test_item)


def test_extension_validation_schema_cache() -> None:
# Defines 3 extensions, but one is a non-existing URL
test_item = request(EO_EXTENSION)

_fetch_schema.cache_clear()

assert not validate_extensions(test_item)
assert _fetch_schema.cache_info().hits == 0
assert _fetch_schema.cache_info().misses == 3

assert not validate_extensions(test_item)
assert _fetch_schema.cache_info().hits == 2
# The non-existing URL will have failed, hence retried
assert _fetch_schema.cache_info().misses == 4


@pytest.mark.parametrize(
"infile,model",
[(EO_EXTENSION, Item), (COLLECTION, Collection)],
Expand Down

0 comments on commit 246803e

Please sign in to comment.