From 69a0d1a7f2f4f5096637eb8ba497a711d6d485d4 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 25 Nov 2024 17:02:56 +0100 Subject: [PATCH 1/2] Issue #666 load_stac: fallback temporal dimension when no cube:dimensions --- CHANGELOG.md | 1 + openeo/metadata.py | 7 ++++++- openeo/util.py | 2 +- setup.py | 1 + tests/rest/test_connection.py | 35 ++++++++++++++++++++++++++++++++++- tests/test_metadata.py | 6 +++++- tests/test_util.py | 35 +++++++++++------------------------ 7 files changed, 59 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa030952f..7b83adabe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- `load_stac`: use fallback temporal dimension when no "cube:dimensions" in STAC Collection ([#666](https://github.com/Open-EO/openeo-python-client/issues/666)) ## [0.35.0] - 2024-11-19 diff --git a/openeo/metadata.py b/openeo/metadata.py index 1de8c38b7..8b7701579 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -11,7 +11,7 @@ import pystac.extensions.item_assets from openeo.internal.jupyter import render_component -from openeo.util import deep_get +from openeo.util import Rfc3339, deep_get _log = logging.getLogger(__name__) @@ -691,6 +691,11 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD if len(temporal_dims) == 1: name, extent = temporal_dims[0] return TemporalDimension(name=name, extent=extent) + elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: + # No explicit "cube:dimensions": build fallback from "extent.temporal", + # with dimension name "t" (openEO API recommendation). + extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] + return TemporalDimension(name="t", extent=extent) else: if isinstance(stac_obj, pystac.Item): cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) diff --git a/openeo/util.py b/openeo/util.py index 6bbd4d897..44842124a 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -172,7 +172,7 @@ def parse_date_or_datetime( @classmethod def _format_datetime(cls, d: dt.datetime) -> str: """Format given datetime as RFC-3339 date-time string.""" - if d.tzinfo not in {None, dt.timezone.utc}: + if not (d.tzinfo is None or d.tzinfo.tzname(d) == "UTC"): # TODO: add support for non-UTC timezones? raise ValueError(f"No support for non-UTC timezone {d.tzinfo}") return d.strftime(cls._FMT_DATETIME) diff --git a/setup.py b/setup.py index b5a2fa75e..d8ff51ea1 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ "pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency "dirty_equals>=0.8.0", "pyarrow>=10.0.1", # For Parquet read/write support in pandas + "python-dateutil>=2.7.0", ] docs_require = [ diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index a7e7d318b..ac2d1042e 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -18,6 +18,7 @@ import openeo from openeo.capabilities import ApiVersionException from openeo.internal.graph_building import FlatGraphableMixin, PGNode +from openeo.metadata import _PYSTAC_1_9_EXTENSION_INTERFACE, TemporalDimension from openeo.rest import ( CapabilitiesException, OpenEoApiError, @@ -40,7 +41,7 @@ ) from openeo.rest.vectorcube import VectorCube from openeo.testing.stac import StacDummyBuilder -from openeo.util import ContextTimer, dict_no_none +from openeo.util import ContextTimer, deep_get, dict_no_none from .auth.test_cli import auth_config, refresh_token_store @@ -2622,6 +2623,38 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim): }, } + @pytest.mark.skipif( + not _PYSTAC_1_9_EXTENSION_INTERFACE, + reason="No backport of implementation/test below PySTAC 1.9 extension interface", + ) + @pytest.mark.parametrize( + ["collection_extent", "dim_extent"], + [ + ( + {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}}, + ["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"], + ), + ( + {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [[None, "2024-05-05"]]}}, + [None, "2024-05-05T00:00:00Z"], + ), + ], + ) + def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path, collection_extent, dim_extent): + """ + Metadata detection when STAC metadata does not use "cube" extension + https://github.com/Open-EO/openeo-python-client/issues/666 + """ + stac_path = tmp_path / "stac.json" + stac_data = StacDummyBuilder.collection(extent=collection_extent) + # No cube:dimensions, but at least "temporal" extent is set as indicator for having a temporal dimension + assert "cube:dimensions" not in stac_data + assert deep_get(stac_data, "extent", "temporal") + stac_path.write_text(json.dumps(stac_data)) + + cube = con120.load_stac(str(stac_path)) + assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent) + @pytest.mark.parametrize( "data", diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fce90186b..a11bc6aaa 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -849,6 +849,10 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat assert warn_count == (0 if eo_extension_is_declared else 1) +@pytest.mark.skipif( + not _PYSTAC_1_9_EXTENSION_INTERFACE, + reason="No backport of implementation/test below PySTAC 1.9 extension interface", +) @pytest.mark.parametrize( ["stac_dict", "expected"], [ @@ -868,7 +872,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat ), ( StacDummyBuilder.collection(), - None, + ("t", ["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"]), ), ( StacDummyBuilder.collection( diff --git a/tests/test_util.py b/tests/test_util.py index 345c1f999..803efd545 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -7,6 +7,7 @@ import unittest.mock as mock from typing import List, Union +import dateutil.tz import pyproj import pytest import shapely.geometry @@ -87,21 +88,12 @@ def test_datetime(self): assert "2020-03-17T12:34:56Z" == rfc3339.datetime([2020, 3, 17, 12, 34, 56]) assert "2020-03-17T12:34:56Z" == rfc3339.datetime(2020, 3, 17, 12, 34, 56) assert "2020-03-17T12:34:00Z" == rfc3339.datetime(2020, 3, 17, 12, 34) - assert "2020-03-17T12:34:56Z" == rfc3339.datetime( - (2020, "3", 17, "12", "34", 56) - ) - assert "2020-09-17T12:34:56Z" == rfc3339.datetime( - [2020, "09", 17, "12", "34", 56] - ) - assert "2020-09-17T12:34:56Z" == rfc3339.datetime( - 2020, "09", "17", "12", "34", 56 - ) - assert "2020-03-17T12:34:56Z" == rfc3339.datetime( - dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None) - ) - assert "2020-03-17T12:34:56Z" == rfc3339.datetime( - dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc) - ) + assert "2020-03-17T12:34:56Z" == rfc3339.datetime((2020, "3", 17, "12", "34", 56)) + assert "2020-09-17T12:34:56Z" == rfc3339.datetime([2020, "09", 17, "12", "34", 56]) + assert "2020-09-17T12:34:56Z" == rfc3339.datetime(2020, "09", "17", "12", "34", 56) + assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)) + assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)) + assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC)) assert "2020-03-17T12:34:56Z" == rfc3339.datetime( dt.datetime( *(2020, 3, 17, 12, 34, 56), @@ -125,15 +117,10 @@ def test_normalize(self): "2020-03-17T12:34:56.44546546Z" ) assert "2020-03-17" == rfc3339.normalize(dt.date(2020, 3, 17)) - assert "2020-03-17T12:34:56Z" == rfc3339.normalize( - dt.datetime(2020, 3, 17, 12, 34, 56) - ) - assert "2020-03-17T12:34:56Z" == rfc3339.normalize( - dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None) - ) - assert "2020-03-17T12:34:56Z" == rfc3339.normalize( - dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc) - ) + assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56)) + assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)) + assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)) + assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC)) assert "2020-03-17T12:34:56Z" == rfc3339.normalize( dt.datetime( *(2020, 3, 17, 12, 34, 56), From 4cd6f599637078f8a44225c0932cd17ccf0648bd Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 25 Nov 2024 17:31:21 +0100 Subject: [PATCH 2/2] Issue #666 load_stac: fix band filtering in cube metadata --- openeo/rest/datacube.py | 4 ++++ tests/rest/test_connection.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index c91fb722a..f0da58849 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -203,6 +203,7 @@ def load_collection( metadata = None if metadata: bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands] + # TODO: also apply spatial/temporal filters to metadata? metadata = metadata.filter_bands(bands) arguments['bands'] = bands @@ -385,6 +386,9 @@ def load_stac( graph = PGNode("load_stac", arguments=arguments) try: metadata = metadata_from_stac(url) + if bands: + # TODO: also apply spatial/temporal filters to metadata? + metadata = metadata.filter_bands(band_names=bands) except Exception: log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) metadata = None diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index ac2d1042e..a447cd6b4 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -2655,6 +2655,19 @@ def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path, cube = con120.load_stac(str(stac_path)) assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent) + def test_load_stac_band_filtering(self, con120, tmp_path): + stac_path = tmp_path / "stac.json" + stac_data = StacDummyBuilder.collection( + summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]} + ) + stac_path.write_text(json.dumps(stac_data)) + + cube = con120.load_stac(str(stac_path)) + assert cube.metadata.band_names == ["B01", "B02", "B03"] + + cube = con120.load_stac(str(stac_path), bands=["B03", "B02"]) + assert cube.metadata.band_names == ["B03", "B02"] + @pytest.mark.parametrize( "data",