-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from SenteraLLC/DEM-136-search
[DEM-136] Scene Search
- Loading branch information
Showing
25 changed files
with
1,642 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
"""Defines package version. Parsed by setup.py and imported by __init__.py.""" | ||
|
||
__version__ = "0.0.1" | ||
__version__ = "0.0.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from ._scenes import parse_nested_stac_data, request_asset_info, search_stac_scenes | ||
|
||
__all__ = ("parse_nested_stac_data", "request_asset_info", "search_stac_scenes") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
from datetime import date | ||
from typing import Any, Dict, Union | ||
|
||
from geo_utils.vector import geojson_to_shapely, shapely_to_geojson_geometry | ||
from joblib import Memory # type: ignore | ||
from pandas import DataFrame, Series | ||
from pystac_client import Client | ||
from requests import get | ||
from retry import retry | ||
|
||
from pixels_utils.scenes._utils import _validate_collections, _validate_geometry | ||
from pixels_utils.stac_catalogs.earthsearch import EARTHSEARCH_ASSET_INFO_KEY | ||
from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_URL, EarthSearchCollections | ||
|
||
memory = Memory("/tmp/pixels-utils-cache/", bytes_limit=2**30, verbose=0) | ||
memory.reduce_size() # Pre-emptively reduce the cache on start-up (must be done manually) | ||
|
||
|
||
@memory.cache | ||
@retry((RuntimeError, KeyError), tries=3, delay=2) | ||
def search_stac_scenes( | ||
geometry: Any, | ||
date_start: Union[date, str], | ||
date_end: Union[date, str], | ||
stac_catalog_url: str = EARTHSEARCH_URL, | ||
collection: Union[str, EarthSearchCollections] = EarthSearchCollections.sentinel_2_l2a, | ||
query: Dict[str, Any] = {"eo:cloud_cover": {"lt": 80}}, | ||
simplify_to_bbox: bool = False, | ||
) -> DataFrame: | ||
""" | ||
Retrieves `scene_id`, `datetime`, and cloud cover for all available image tiles between `date_start` and `date_end`. | ||
See EarthSearch API documentation for more information: | ||
https://earth-search.aws.element84.com/v1/api.html#tag/Item-Search/operation/getItemSearch | ||
Args: | ||
geometry (Any): Geometry of search area; must be able to be parsed to a shapely object, and must be in the | ||
EPSG=4326 CRS. If a GeoJSON Feature or FeatureCollection is passed, all geometries will be combined into a | ||
single geometry to determine the bounding box. | ||
date_start (Union[date, str]): Earliest UTC date to seach for available images (inclusive). | ||
date_end (Union[date, str]): Latest UTC date to seach for available images (inclusive). | ||
stac_catalog_url (str, optional): URL of the STAC catalog to search. Defaults to EARTHSEARCH_URL | ||
("https://earth-search.aws.element84.com/v1"). | ||
collection: Union[str, EarthSearchCollections], optional): STAC collection to search. Defaults to | ||
EarthSearchCollections.sentinel_2_l2a ("sentinel-2-l2a"). | ||
query (Dict[str, Any], optional): Additional query parameters to pass to the STAC search API. Defaults to | ||
`{"eo:cloud_cover": {"lt": 80}}`, which filters out scenes with cloud cover greater than 80%. | ||
simplify_to_bbox (bool, optional): Whether geometry should be simplified to the bounding box (True) or not; if | ||
True, uses `bbox` argument of `api.search()`; if False, uses `intersects` argument of `api.search()`. Defaults | ||
to False. | ||
Returns: | ||
DataFrame: DataFrame with `scene_id`, `datetime`, and `eo:cloud_cover` for each scene that intersects `geometry` | ||
and date parameters. | ||
""" | ||
date_start = date_start.strftime("%Y-%m-%d") if isinstance(date_start, date) else date_start | ||
date_end = date_end.strftime("%Y-%m-%d") if isinstance(date_end, date) else date_end | ||
_validate_geometry(geometry) | ||
collection = _validate_collections(collection, stac_catalog_url) | ||
bbox = geojson_to_shapely(geometry).bounds if simplify_to_bbox is True else None | ||
intersects = shapely_to_geojson_geometry(geojson_to_shapely(geometry)) if simplify_to_bbox is False else None | ||
|
||
api = Client.open(url=stac_catalog_url) | ||
|
||
# TODO: Consider adding additional parameters to this function to provide more control over the search | ||
s = api.search( | ||
method="POST", | ||
# max_items=None, | ||
# limit=limit, | ||
# ids=None, | ||
collections=[collection], | ||
bbox=bbox, | ||
intersects=intersects, | ||
datetime=[date_start, date_end], | ||
# filter=None, | ||
# filter_lang=None, | ||
# sortby=sortby, | ||
# fields=None, | ||
query=query, | ||
) | ||
df = DataFrame(s.item_collection_as_dict()["features"]) | ||
# Append `datetime` and `eo:cloud_cover` columns to main DataFrame | ||
df["datetime"] = df["properties"].apply(lambda properties: properties["datetime"]) | ||
df["eo:cloud_cover"] = df["properties"].apply(lambda properties: properties["eo:cloud_cover"]) | ||
df = df.sort_values(by="datetime", ascending=True, ignore_index=True) | ||
return df | ||
|
||
|
||
def parse_nested_stac_data(df: DataFrame, column: str) -> DataFrame: | ||
""" | ||
Parses nested STAC data from a DataFrame column into a new DataFrame. | ||
Args: | ||
df (DataFrame): DataFrame containing nested STAC data. | ||
column (str): Name of column containing nested STAC data. | ||
Returns: | ||
DataFrame: DataFrame with nested STAC data parsed into new columns. | ||
""" | ||
assert column in df.columns, f"Column '{column}' not found in DataFrame" | ||
assert isinstance(df[column].iloc[0], dict), f"Column '{column}' must be a dict to parse nested data." | ||
return df[column].apply(lambda properties: Series(properties)) | ||
|
||
|
||
@memory.cache | ||
@retry((RuntimeError, KeyError), tries=3, delay=2) | ||
def request_asset_info(df: DataFrame) -> DataFrame: | ||
""" | ||
Retrieves asset info for each scene in a DataFrame. | ||
Args: | ||
df (DataFrame): DataFrame containing STAC data. | ||
Returns: | ||
DataFrame: DataFrame with asset info for each scene. | ||
""" | ||
assert "assets" in df.columns, "Column 'assets' not found in DataFrame; cannot retrieve asset info." | ||
assert ( | ||
"stac_version" in df.columns | ||
), "Column 'stac_version' not found in DataFrame; cannot retrieve determine structure of STAC data." | ||
|
||
def _request_asset_info(info_url: str) -> Series: | ||
r = get(url=info_url) | ||
return Series(r.json()) | ||
|
||
def _get_stac_version(df: DataFrame) -> str: | ||
return df["stac_version"].iloc[0] | ||
|
||
stac_version = _get_stac_version(df) | ||
return df["assets"].apply( | ||
lambda assets: _request_asset_info(assets[EARTHSEARCH_ASSET_INFO_KEY[stac_version]]["href"]) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from typing import Any, Dict, Tuple, Union | ||
|
||
from geo_utils.vector import validate_geojson | ||
from geo_utils.vector._geojson import VALID_GEOJSON_GEOM_TYPES | ||
from geo_utils.vector._shapely import VALID_SHAPELY_GEOM_TYPES | ||
from geojson.feature import Feature | ||
|
||
from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_URL, EarthSearchCollections | ||
|
||
Bounds = Tuple[float, float, float, float] | ||
|
||
|
||
def _earthsearch_version_from_stac_catalog_url(stac_catalog_url: str = EARTHSEARCH_URL): | ||
"""Gets the EarthSearchCollections class for the given version of the STAC catalog URL.""" | ||
stac_version = stac_catalog_url.split("/")[-1] | ||
if stac_version == "v0": | ||
from pixels_utils.stac_catalogs.earthsearch.v0 import EarthSearchCollections | ||
|
||
return EarthSearchCollections | ||
elif stac_version == "v1": | ||
from pixels_utils.stac_catalogs.earthsearch.v1 import EarthSearchCollections | ||
|
||
return EarthSearchCollections | ||
else: | ||
raise ValueError(f"STAC version '{stac_version}' not supported by pixels-utils.") | ||
|
||
|
||
def _validate_collections( | ||
collection: Union[str, EarthSearchCollections], stac_catalog_url: str = EARTHSEARCH_URL | ||
) -> str: | ||
"""Validates that collection is a valid STAC collection for the given STAC catalog URL.""" | ||
# TODO: Make more robust if needing to support more STAC catalogs | ||
earthsearch_collections = _earthsearch_version_from_stac_catalog_url(stac_catalog_url) | ||
|
||
collection = collection.name if isinstance(collection, earthsearch_collections) else collection | ||
assert collection in [ | ||
c.name for c in earthsearch_collections | ||
], f"Collection '{collection}' not supported by pixels-utils." | ||
return collection | ||
|
||
|
||
def _validate_geometry(geom: Any) -> Bounds: | ||
""" | ||
Validates the passed geometry object and raises an informative error if problem is detected. | ||
Args: | ||
geom (Any): Input geometry; should be GeoJSON object, shapely object, or WKT string. | ||
""" | ||
if isinstance(geom, tuple([Dict, str])): # geojson objects evaluate to True, so this catches all geojson objects | ||
geojson = validate_geojson(geom) | ||
if not isinstance(geojson, tuple(list(VALID_GEOJSON_GEOM_TYPES) + [Feature])): | ||
# FeatureCollection or GeometryCollection; geojson_to_shapely() will throw TypeError | ||
raise TypeError( | ||
f'Cannot determine bounds from geojson type of "{type(geojson).__name__}" because there are ' | ||
"potentially multiple geometries present. Either choose a single geometry or merge the collection of " | ||
"geometries." | ||
) | ||
else: | ||
if not isinstance(geom, VALID_SHAPELY_GEOM_TYPES): | ||
# Not a dict, str, or shapely | ||
raise TypeError( | ||
f'Cannot determine bounds from input of "{type(geom).__name__}". Please pass a valid shapely or geojson object.' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Support of New STAC Catalogs | ||
- Add a new folder/directory under `stac_catalogs` that describes the catalog (e.g., `stac_catalogs/earthsearch`) | ||
- Under the specific catalog directory, add a .py file that includes: | ||
- A variable containing the URL/endpoint (e.g., https://earth-search.aws.element84.com/v1) | ||
- An ENUM class that contains the available collections (for example, see [stac_catalogs/earthsearch/v1.py](https://github.com/SenteraLLC/pixels-utils/tree/main/pixels_utils/stac_catalogs/earthsearch/v1.py)) | ||
- Store a .json of the URL/endpoint (for example, see [stac_catalogs/earthsearch/v1.json](https://github.com/SenteraLLC/pixels-utils/tree/main/pixels_utils/stac_catalogs/earthsearch/v1.json)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from ._core import EARTHSEARCH_ASSET_INFO_KEY, AutoDashNameEnum | ||
|
||
__all__ = ("AutoDashNameEnum", "EARTHSEARCH_ASSET_INFO_KEY") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from enum import Enum | ||
|
||
|
||
class AutoDashNameEnum(Enum): | ||
"""Sets the name of the enum to be the same as the value, but with underscores replaced with dashes.""" | ||
|
||
def __init__(self, value): | ||
self._name_ = self._name_.replace("_", "-") | ||
self._value_ = value | ||
|
||
|
||
# The EARTHSEARCH_ASSET_INFO_KEY dict maps the STAC version to the asset metadata/info key found in the collection | ||
EARTHSEARCH_ASSET_INFO_KEY = {"1.0.0-beta.2": "info", "1.0.0": "tileinfo_metadata"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"stac_version": "1.0.0-beta.2", | ||
"stac_api_version": "0.9.0", | ||
"id": "earth-search", | ||
"title": "Earth Search", | ||
"description": "A STAC API of AWS Public Datasets powered by stac-server", | ||
"links": [ | ||
{ | ||
"rel": "child", | ||
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a" | ||
}, | ||
{ | ||
"rel": "child", | ||
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c" | ||
}, | ||
{ | ||
"rel": "child", | ||
"href": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs" | ||
}, | ||
{ | ||
"rel": "child", | ||
"href": "https://earth-search.aws.element84.com/v0/collections/landsat-8-l1-c1" | ||
}, | ||
{ | ||
"rel": "service-desc", | ||
"type": "application/vnd.oai.openapi+json;version=3.0", | ||
"href": "https://earth-search.aws.element84.com/v0/api" | ||
}, | ||
{ | ||
"rel": "conformance", | ||
"type": "application/json", | ||
"href": "https://earth-search.aws.element84.com/v0/conformance" | ||
}, | ||
{ | ||
"rel": "children", | ||
"type": "application/json", | ||
"href": "https://earth-search.aws.element84.com/v0/collections" | ||
}, | ||
{ | ||
"rel": "self", | ||
"type": "application/json", | ||
"href": "https://earth-search.aws.element84.com/v0/" | ||
}, | ||
{ | ||
"rel": "search", | ||
"type": "application/json", | ||
"href": "https://earth-search.aws.element84.com/v0/search" | ||
}, | ||
{ | ||
"rel": "docs", | ||
"href": "https://stac-utils.github.io/stac-server/" | ||
} | ||
] | ||
} |
Oops, something went wrong.