Skip to content

Commit

Permalink
Merge pull request #2 from SenteraLLC/DEM-136-search
Browse files Browse the repository at this point in the history
[DEM-136] Scene Search
  • Loading branch information
tnigon authored Jun 12, 2023
2 parents 01c9372 + 86a855c commit 08d6872
Show file tree
Hide file tree
Showing 25 changed files with 1,642 additions and 144 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ _build/
pixels_utils/constants/
pixels_utils/endpoints/
pixels_utils/stac/
pixels_utils/tests/
pixels_utils/tests/test_endpoint_stac/
pixels_utils/tests/test_stac_endpoint_statistics/
pixels_utils/tests/data/utilities_testing.py
pixels_utils/utilities.py
pixels_utils/utils_crop.py
pixels_utils/utils_statistics.py
pixels_utils/generate_test_data.py
pixels_utils/mask.py
pixels_utils/scenes.py
pixels_utils/rasterio_helper.py
pixels_utils/README.md
45 changes: 20 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,39 +63,34 @@ if __name__ == "__main__":

<h5 a><strong><code>pixels_utils_scene_search.py</code></strong></h5>

``` python
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pixels_utils.constants.titiler import ENDPOINT_STATISTICS
from pixels_utils.mask import SCL


from geo_utils.vector import geojson_geometry_to_shapely

```python
from pixels_utils.tests.data.load_data import sample_geojson
from pixels_utils.scenes import get_stac_scenes
from pixels_utils.scenes import search_stac_scenes
from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_URL, EarthSearchCollections

DATA_ID = 1

geojson = sample_geojson(DATA_ID)
DATA_ID = 1

scenes = get_stac_scenes(
bounding_box=geojson_geometry_to_shapely(geojson).bounds,
date_start= "2019-01-01",
date_end= "2019-01-31",
max_scene_cloud_cover_percent = 80,
df_scenes = search_stac_scenes(
geometry=sample_geojson(DATA_ID),
date_start="2019-01-01",
date_end="2019-01-31",
stac_catalog_url=EARTHSEARCH_URL,
collection=EarthSearchCollections.sentinel_2_l2a,
query={"eo:cloud_cover": {"lt": 80}}, # keeps less than 80% cloud cover,
simplify_to_bbox=True,
)

pprint(r.json()["properties"][ENDPOINT_STATISTICS])
print(df_scenes[["id", "datetime", "eo:cloud_cover"]].to_markdown(tablefmt="pipe"))
```

<h5 a><code>[OUTPUT]</code></h5>

| index | id | datetime | eo:cloud_cover |
| ----- | ------------------------ | -------------------- | -------------- |
| 0 | S2B_10TGS_20190125_0_L2A | 2019-01-25T19:01:37Z | 45.17 |
| 1 | S2A_10TGS_20190110_0_L2A | 2019-01-10T19:01:32Z | 56.59 |
| 2 | S2A_11TLM_20190110_0_L2A | 2019-01-10T19:01:30Z | 24.87 |
| | id | datetime | eo:cloud_cover |
|---:|:-------------------------|:----------------------------|-----------------:|
| 0 | S2A_11TLM_20190110_0_L2A | 2019-01-10T19:01:30.135000Z | 26.9409 |
| 1 | S2A_10TGS_20190110_0_L2A | 2019-01-10T19:01:32.811000Z | 61.8212 |
| 2 | S2B_10TGS_20190125_0_L2A | 2019-01-25T19:01:37.534000Z | 55.6444 |


### Example 2 - Get cloud-masked statistics for a geometry
Expand All @@ -105,7 +100,7 @@ pprint(r.json()["properties"][ENDPOINT_STATISTICS])
``` python
from pixels_utils.endpoints.stac import statistics
from pixels_utils.constants.sentinel2 import (
ELEMENT84_L2A_SCENE_URL,
ELEMENT84_L2A_SCENE_URL_V0,
SENTINEL_2_L2A_COLLECTION,
EXPRESSION_NDVI,
)
Expand All @@ -114,7 +109,7 @@ from pixels_utils.mask import SCL
from pixels_utils.tests.data import sceneid, sample_geojson
from pixels_utils.utilities import _check_assets_expression

scene_url = ELEMENT84_L2A_SCENE_URL.format(
scene_url = ELEMENT84_L2A_SCENE_URL_V0.format(
collection=SENTINEL_2_L2A_COLLECTION, sceneid=sceneid
)
assets=None
Expand Down
2 changes: 1 addition & 1 deletion pixels_utils/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Defines package version. Parsed by setup.py and imported by __init__.py."""

__version__ = "0.0.1"
__version__ = "0.0.2"
3 changes: 3 additions & 0 deletions pixels_utils/scenes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._scenes import parse_nested_stac_data, request_asset_info, search_stac_scenes

__all__ = ("parse_nested_stac_data", "request_asset_info", "search_stac_scenes")
136 changes: 136 additions & 0 deletions pixels_utils/scenes/_scenes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from datetime import date
from typing import Any, Dict, Union

from geo_utils.vector import geojson_to_shapely, shapely_to_geojson_geometry
from joblib import Memory # type: ignore
from pandas import DataFrame, Series
from pystac_client import Client
from requests import get
from retry import retry

from pixels_utils.scenes._utils import _validate_collections, _validate_geometry
from pixels_utils.stac_catalogs.earthsearch import EARTHSEARCH_ASSET_INFO_KEY
from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_URL, EarthSearchCollections

memory = Memory("/tmp/pixels-utils-cache/", bytes_limit=2**30, verbose=0)
memory.reduce_size() # Pre-emptively reduce the cache on start-up (must be done manually)


@memory.cache
@retry((RuntimeError, KeyError), tries=3, delay=2)
def search_stac_scenes(
geometry: Any,
date_start: Union[date, str],
date_end: Union[date, str],
stac_catalog_url: str = EARTHSEARCH_URL,
collection: Union[str, EarthSearchCollections] = EarthSearchCollections.sentinel_2_l2a,
query: Dict[str, Any] = {"eo:cloud_cover": {"lt": 80}},
simplify_to_bbox: bool = False,
) -> DataFrame:
"""
Retrieves `scene_id`, `datetime`, and cloud cover for all available image tiles between `date_start` and `date_end`.
See EarthSearch API documentation for more information:
https://earth-search.aws.element84.com/v1/api.html#tag/Item-Search/operation/getItemSearch
Args:
geometry (Any): Geometry of search area; must be able to be parsed to a shapely object, and must be in the
EPSG=4326 CRS. If a GeoJSON Feature or FeatureCollection is passed, all geometries will be combined into a
single geometry to determine the bounding box.
date_start (Union[date, str]): Earliest UTC date to seach for available images (inclusive).
date_end (Union[date, str]): Latest UTC date to seach for available images (inclusive).
stac_catalog_url (str, optional): URL of the STAC catalog to search. Defaults to EARTHSEARCH_URL
("https://earth-search.aws.element84.com/v1").
collection: Union[str, EarthSearchCollections], optional): STAC collection to search. Defaults to
EarthSearchCollections.sentinel_2_l2a ("sentinel-2-l2a").
query (Dict[str, Any], optional): Additional query parameters to pass to the STAC search API. Defaults to
`{"eo:cloud_cover": {"lt": 80}}`, which filters out scenes with cloud cover greater than 80%.
simplify_to_bbox (bool, optional): Whether geometry should be simplified to the bounding box (True) or not; if
True, uses `bbox` argument of `api.search()`; if False, uses `intersects` argument of `api.search()`. Defaults
to False.
Returns:
DataFrame: DataFrame with `scene_id`, `datetime`, and `eo:cloud_cover` for each scene that intersects `geometry`
and date parameters.
"""
date_start = date_start.strftime("%Y-%m-%d") if isinstance(date_start, date) else date_start
date_end = date_end.strftime("%Y-%m-%d") if isinstance(date_end, date) else date_end
_validate_geometry(geometry)
collection = _validate_collections(collection, stac_catalog_url)
bbox = geojson_to_shapely(geometry).bounds if simplify_to_bbox is True else None
intersects = shapely_to_geojson_geometry(geojson_to_shapely(geometry)) if simplify_to_bbox is False else None

api = Client.open(url=stac_catalog_url)

# TODO: Consider adding additional parameters to this function to provide more control over the search
s = api.search(
method="POST",
# max_items=None,
# limit=limit,
# ids=None,
collections=[collection],
bbox=bbox,
intersects=intersects,
datetime=[date_start, date_end],
# filter=None,
# filter_lang=None,
# sortby=sortby,
# fields=None,
query=query,
)
df = DataFrame(s.item_collection_as_dict()["features"])
# Append `datetime` and `eo:cloud_cover` columns to main DataFrame
df["datetime"] = df["properties"].apply(lambda properties: properties["datetime"])
df["eo:cloud_cover"] = df["properties"].apply(lambda properties: properties["eo:cloud_cover"])
df = df.sort_values(by="datetime", ascending=True, ignore_index=True)
return df


def parse_nested_stac_data(df: DataFrame, column: str) -> DataFrame:
"""
Parses nested STAC data from a DataFrame column into a new DataFrame.
Args:
df (DataFrame): DataFrame containing nested STAC data.
column (str): Name of column containing nested STAC data.
Returns:
DataFrame: DataFrame with nested STAC data parsed into new columns.
"""
assert column in df.columns, f"Column '{column}' not found in DataFrame"
assert isinstance(df[column].iloc[0], dict), f"Column '{column}' must be a dict to parse nested data."
return df[column].apply(lambda properties: Series(properties))


@memory.cache
@retry((RuntimeError, KeyError), tries=3, delay=2)
def request_asset_info(df: DataFrame) -> DataFrame:
"""
Retrieves asset info for each scene in a DataFrame.
Args:
df (DataFrame): DataFrame containing STAC data.
Returns:
DataFrame: DataFrame with asset info for each scene.
"""
assert "assets" in df.columns, "Column 'assets' not found in DataFrame; cannot retrieve asset info."
assert (
"stac_version" in df.columns
), "Column 'stac_version' not found in DataFrame; cannot retrieve determine structure of STAC data."

def _request_asset_info(info_url: str) -> Series:
r = get(url=info_url)
return Series(r.json())

def _get_stac_version(df: DataFrame) -> str:
return df["stac_version"].iloc[0]

stac_version = _get_stac_version(df)
return df["assets"].apply(
lambda assets: _request_asset_info(assets[EARTHSEARCH_ASSET_INFO_KEY[stac_version]]["href"])
)
63 changes: 63 additions & 0 deletions pixels_utils/scenes/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import Any, Dict, Tuple, Union

from geo_utils.vector import validate_geojson
from geo_utils.vector._geojson import VALID_GEOJSON_GEOM_TYPES
from geo_utils.vector._shapely import VALID_SHAPELY_GEOM_TYPES
from geojson.feature import Feature

from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_URL, EarthSearchCollections

Bounds = Tuple[float, float, float, float]


def _earthsearch_version_from_stac_catalog_url(stac_catalog_url: str = EARTHSEARCH_URL):
"""Gets the EarthSearchCollections class for the given version of the STAC catalog URL."""
stac_version = stac_catalog_url.split("/")[-1]
if stac_version == "v0":
from pixels_utils.stac_catalogs.earthsearch.v0 import EarthSearchCollections

return EarthSearchCollections
elif stac_version == "v1":
from pixels_utils.stac_catalogs.earthsearch.v1 import EarthSearchCollections

return EarthSearchCollections
else:
raise ValueError(f"STAC version '{stac_version}' not supported by pixels-utils.")


def _validate_collections(
collection: Union[str, EarthSearchCollections], stac_catalog_url: str = EARTHSEARCH_URL
) -> str:
"""Validates that collection is a valid STAC collection for the given STAC catalog URL."""
# TODO: Make more robust if needing to support more STAC catalogs
earthsearch_collections = _earthsearch_version_from_stac_catalog_url(stac_catalog_url)

collection = collection.name if isinstance(collection, earthsearch_collections) else collection
assert collection in [
c.name for c in earthsearch_collections
], f"Collection '{collection}' not supported by pixels-utils."
return collection


def _validate_geometry(geom: Any) -> Bounds:
"""
Validates the passed geometry object and raises an informative error if problem is detected.
Args:
geom (Any): Input geometry; should be GeoJSON object, shapely object, or WKT string.
"""
if isinstance(geom, tuple([Dict, str])): # geojson objects evaluate to True, so this catches all geojson objects
geojson = validate_geojson(geom)
if not isinstance(geojson, tuple(list(VALID_GEOJSON_GEOM_TYPES) + [Feature])):
# FeatureCollection or GeometryCollection; geojson_to_shapely() will throw TypeError
raise TypeError(
f'Cannot determine bounds from geojson type of "{type(geojson).__name__}" because there are '
"potentially multiple geometries present. Either choose a single geometry or merge the collection of "
"geometries."
)
else:
if not isinstance(geom, VALID_SHAPELY_GEOM_TYPES):
# Not a dict, str, or shapely
raise TypeError(
f'Cannot determine bounds from input of "{type(geom).__name__}". Please pass a valid shapely or geojson object.'
)
6 changes: 6 additions & 0 deletions pixels_utils/stac_catalogs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Support of New STAC Catalogs
- Add a new folder/directory under `stac_catalogs` that describes the catalog (e.g., `stac_catalogs/earthsearch`)
- Under the specific catalog directory, add a .py file that includes:
- A variable containing the URL/endpoint (e.g., https://earth-search.aws.element84.com/v1)
- An ENUM class that contains the available collections (for example, see [stac_catalogs/earthsearch/v1.py](https://github.com/SenteraLLC/pixels-utils/tree/main/pixels_utils/stac_catalogs/earthsearch/v1.py))
- Store a .json of the URL/endpoint (for example, see [stac_catalogs/earthsearch/v1.json](https://github.com/SenteraLLC/pixels-utils/tree/main/pixels_utils/stac_catalogs/earthsearch/v1.json))
3 changes: 3 additions & 0 deletions pixels_utils/stac_catalogs/earthsearch/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._core import EARTHSEARCH_ASSET_INFO_KEY, AutoDashNameEnum

__all__ = ("AutoDashNameEnum", "EARTHSEARCH_ASSET_INFO_KEY")
13 changes: 13 additions & 0 deletions pixels_utils/stac_catalogs/earthsearch/_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from enum import Enum


class AutoDashNameEnum(Enum):
"""Sets the name of the enum to be the same as the value, but with underscores replaced with dashes."""

def __init__(self, value):
self._name_ = self._name_.replace("_", "-")
self._value_ = value


# The EARTHSEARCH_ASSET_INFO_KEY dict maps the STAC version to the asset metadata/info key found in the collection
EARTHSEARCH_ASSET_INFO_KEY = {"1.0.0-beta.2": "info", "1.0.0": "tileinfo_metadata"}
54 changes: 54 additions & 0 deletions pixels_utils/stac_catalogs/earthsearch/v0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"stac_version": "1.0.0-beta.2",
"stac_api_version": "0.9.0",
"id": "earth-search",
"title": "Earth Search",
"description": "A STAC API of AWS Public Datasets powered by stac-server",
"links": [
{
"rel": "child",
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a"
},
{
"rel": "child",
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c"
},
{
"rel": "child",
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs"
},
{
"rel": "child",
"href": "https://earth-search.aws.element84.com/v0/collections/landsat-8-l1-c1"
},
{
"rel": "service-desc",
"type": "application/vnd.oai.openapi+json;version=3.0",
"href": "https://earth-search.aws.element84.com/v0/api"
},
{
"rel": "conformance",
"type": "application/json",
"href": "https://earth-search.aws.element84.com/v0/conformance"
},
{
"rel": "children",
"type": "application/json",
"href": "https://earth-search.aws.element84.com/v0/collections"
},
{
"rel": "self",
"type": "application/json",
"href": "https://earth-search.aws.element84.com/v0/"
},
{
"rel": "search",
"type": "application/json",
"href": "https://earth-search.aws.element84.com/v0/search"
},
{
"rel": "docs",
"href": "https://stac-utils.github.io/stac-server/"
}
]
}
Loading

0 comments on commit 08d6872

Please sign in to comment.