Skip to content

Commit

Permalink
Merge branch 'issue666-load_stac-temporal-metadata'
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Nov 26, 2024
2 parents 37ba260 + 4cd6f59 commit 5a3e6f4
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- `load_stac`: use fallback temporal dimension when no "cube:dimensions" in STAC Collection ([#666](https://github.com/Open-EO/openeo-python-client/issues/666))

## [0.35.0] - 2024-11-19

Expand Down
7 changes: 6 additions & 1 deletion openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pystac.extensions.item_assets

from openeo.internal.jupyter import render_component
from openeo.util import deep_get
from openeo.util import Rfc3339, deep_get

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -691,6 +691,11 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal:
# No explicit "cube:dimensions": build fallback from "extent.temporal",
# with dimension name "t" (openEO API recommendation).
extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]]
return TemporalDimension(name="t", extent=extent)
else:
if isinstance(stac_obj, pystac.Item):
cube_dimensions = stac_obj.properties.get("cube:dimensions", {})
Expand Down
4 changes: 4 additions & 0 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def load_collection(
metadata = None
if metadata:
bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands]
# TODO: also apply spatial/temporal filters to metadata?
metadata = metadata.filter_bands(bands)
arguments['bands'] = bands

Expand Down Expand Up @@ -385,6 +386,9 @@ def load_stac(
graph = PGNode("load_stac", arguments=arguments)
try:
metadata = metadata_from_stac(url)
if bands:
# TODO: also apply spatial/temporal filters to metadata?
metadata = metadata.filter_bands(band_names=bands)
except Exception:
log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True)
metadata = None
Expand Down
2 changes: 1 addition & 1 deletion openeo/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def parse_date_or_datetime(
@classmethod
def _format_datetime(cls, d: dt.datetime) -> str:
"""Format given datetime as RFC-3339 date-time string."""
if d.tzinfo not in {None, dt.timezone.utc}:
if not (d.tzinfo is None or d.tzinfo.tzname(d) == "UTC"):
# TODO: add support for non-UTC timezones?
raise ValueError(f"No support for non-UTC timezone {d.tzinfo}")
return d.strftime(cls._FMT_DATETIME)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency
"dirty_equals>=0.8.0",
"pyarrow>=10.0.1", # For Parquet read/write support in pandas
"python-dateutil>=2.7.0",
]

docs_require = [
Expand Down
48 changes: 47 additions & 1 deletion tests/rest/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import openeo
from openeo.capabilities import ApiVersionException
from openeo.internal.graph_building import FlatGraphableMixin, PGNode
from openeo.metadata import _PYSTAC_1_9_EXTENSION_INTERFACE, TemporalDimension
from openeo.rest import (
CapabilitiesException,
OpenEoApiError,
Expand All @@ -40,7 +41,7 @@
)
from openeo.rest.vectorcube import VectorCube
from openeo.testing.stac import StacDummyBuilder
from openeo.util import ContextTimer, dict_no_none
from openeo.util import ContextTimer, deep_get, dict_no_none

from .auth.test_cli import auth_config, refresh_token_store

Expand Down Expand Up @@ -2622,6 +2623,51 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
},
}

@pytest.mark.skipif(
not _PYSTAC_1_9_EXTENSION_INTERFACE,
reason="No backport of implementation/test below PySTAC 1.9 extension interface",
)
@pytest.mark.parametrize(
["collection_extent", "dim_extent"],
[
(
{"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}},
["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"],
),
(
{"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [[None, "2024-05-05"]]}},
[None, "2024-05-05T00:00:00Z"],
),
],
)
def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path, collection_extent, dim_extent):
"""
Metadata detection when STAC metadata does not use "cube" extension
https://github.com/Open-EO/openeo-python-client/issues/666
"""
stac_path = tmp_path / "stac.json"
stac_data = StacDummyBuilder.collection(extent=collection_extent)
# No cube:dimensions, but at least "temporal" extent is set as indicator for having a temporal dimension
assert "cube:dimensions" not in stac_data
assert deep_get(stac_data, "extent", "temporal")
stac_path.write_text(json.dumps(stac_data))

cube = con120.load_stac(str(stac_path))
assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent)

def test_load_stac_band_filtering(self, con120, tmp_path):
stac_path = tmp_path / "stac.json"
stac_data = StacDummyBuilder.collection(
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
)
stac_path.write_text(json.dumps(stac_data))

cube = con120.load_stac(str(stac_path))
assert cube.metadata.band_names == ["B01", "B02", "B03"]

cube = con120.load_stac(str(stac_path), bands=["B03", "B02"])
assert cube.metadata.band_names == ["B03", "B02"]


@pytest.mark.parametrize(
"data",
Expand Down
6 changes: 5 additions & 1 deletion tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,10 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
assert warn_count == (0 if eo_extension_is_declared else 1)


@pytest.mark.skipif(
not _PYSTAC_1_9_EXTENSION_INTERFACE,
reason="No backport of implementation/test below PySTAC 1.9 extension interface",
)
@pytest.mark.parametrize(
["stac_dict", "expected"],
[
Expand All @@ -868,7 +872,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
),
(
StacDummyBuilder.collection(),
None,
("t", ["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"]),
),
(
StacDummyBuilder.collection(
Expand Down
35 changes: 11 additions & 24 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import unittest.mock as mock
from typing import List, Union

import dateutil.tz
import pyproj
import pytest
import shapely.geometry
Expand Down Expand Up @@ -87,21 +88,12 @@ def test_datetime(self):
assert "2020-03-17T12:34:56Z" == rfc3339.datetime([2020, 3, 17, 12, 34, 56])
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(2020, 3, 17, 12, 34, 56)
assert "2020-03-17T12:34:00Z" == rfc3339.datetime(2020, 3, 17, 12, 34)
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
(2020, "3", 17, "12", "34", 56)
)
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(
[2020, "09", 17, "12", "34", 56]
)
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(
2020, "09", "17", "12", "34", 56
)
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)
)
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)
)
assert "2020-03-17T12:34:56Z" == rfc3339.datetime((2020, "3", 17, "12", "34", 56))
assert "2020-09-17T12:34:56Z" == rfc3339.datetime([2020, "09", 17, "12", "34", 56])
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(2020, "09", "17", "12", "34", 56)
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None))
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc))
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC))
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
dt.datetime(
*(2020, 3, 17, 12, 34, 56),
Expand All @@ -125,15 +117,10 @@ def test_normalize(self):
"2020-03-17T12:34:56.44546546Z"
)
assert "2020-03-17" == rfc3339.normalize(dt.date(2020, 3, 17))
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
dt.datetime(2020, 3, 17, 12, 34, 56)
)
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)
)
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)
)
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56))
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None))
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc))
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC))
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
dt.datetime(
*(2020, 3, 17, 12, 34, 56),
Expand Down

0 comments on commit 5a3e6f4

Please sign in to comment.