Skip to content

Commit

Permalink
Merge pull request #10 from SenteraLLC/dem-464-mosaicco-rs
Browse files Browse the repository at this point in the history
[DEM-464] Support more than just NDVI spectral index
  • Loading branch information
tnigon authored Jan 29, 2024
2 parents 965512b + eff44b1 commit 178bf1b
Show file tree
Hide file tree
Showing 9 changed files with 1,010 additions and 783 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
- repo: https://github.com/ambv/black
rev: 23.11.0
rev: 24.1.1
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: '6.1.0'
rev: '7.0.0'
hooks:
- id: flake8
exclude: (tests|doc)
Expand Down
2 changes: 1 addition & 1 deletion pixels_utils/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Defines package version. Parsed by setup.py and imported by __init__.py."""

__version__ = "0.4.1"
__version__ = "0.5.0"
6 changes: 6 additions & 0 deletions pixels_utils/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pixels_utils.helpers._helper import filter_scenes_with_all_nodata, get_satellite_expression_as_df

__all__ = (
"filter_scenes_with_all_nodata",
"get_satellite_expression_as_df",
)
168 changes: 168 additions & 0 deletions pixels_utils/helpers/_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import logging
from datetime import datetime
from typing import Union

from geojson.feature import Feature
from numpy import float32
from pandas import DataFrame, Series, concat
from rasterio import MemoryFile
from rasterio.enums import Resampling
from rasterio.sample import sample_gen
from tqdm import tqdm

from pixels_utils.scenes import parse_nested_stac_data
from pixels_utils.stac_catalogs.earthsearch.v1 import EARTHSEARCH_SCENE_URL, EarthSearchCollections, Expression
from pixels_utils.titiler import TITILER_ENDPOINT
from pixels_utils.titiler.endpoints.stac import Crop, QueryParamsCrop, QueryParamsStatistics, Statistics
from pixels_utils.titiler.mask.enum_classes import Sentinel2_SCL_Group


def filter_scenes_with_all_nodata(
df_scenes: DataFrame,
feature: Feature,
expression_obj: Expression,
gsd: Union[float, int] = 10,
nodata: Union[float, int] = -999,
) -> list:
"""
Finds the index of all scenes without any valid pixels across the extent of the field group geometry.
Note:
The Titiler/sentera.pixels endpoint has a nuance that if all pixels within a spatial query are NODATA, then
the Statistics endpoint returns statistics values equivalent to the NODATA value set. This function finds
the index of all scenes where this is the case so they can be removed from the list of scenes to process.
Args:
df_scenes (DataFrame): Initial list of scenes to process.
Returns:
list: Index values of df_scenes that can safely be removed because they do not have any valid pixels.
"""
df_scene_properties = parse_nested_stac_data(df=df_scenes, column="properties")

ind_nodata = []
for ind in tqdm(
list(df_scenes.index),
desc="Filtering out scenes where clouds are covering the plot area",
):
scene = df_scenes.loc[ind]
properties = df_scene_properties.loc[ind]
date = datetime.strptime(properties["datetime"].split("T")[0], "%Y-%m-%d")
# if date > datetime(2023, 5, 30):
# break

scene_url = EARTHSEARCH_SCENE_URL.format(collection=EarthSearchCollections.sentinel_2_l2a.name, id=scene["id"])

query_params = QueryParamsStatistics(
url=scene_url,
feature=feature,
expression=expression_obj.expression,
asset_as_band=True,
coord_crs=None,
gsd=gsd,
nodata=nodata,
resampling=Resampling.nearest.name,
)

stats_arable_wlist = Statistics(
query_params=query_params,
clear_cache=False,
titiler_endpoint=TITILER_ENDPOINT,
mask_enum=Sentinel2_SCL_Group.ARABLE,
mask_asset="scl",
whitelist=True,
)

expression_ = list(stats_arable_wlist.response.json()["properties"]["statistics"].keys())[0]
stats = stats_arable_wlist.response.json()["properties"]["statistics"][expression_]
# if stats["mean"] == nodata:
if stats["min"] == nodata:
# Indicates at least one pixel from statistics query is set to NODATA (i.e., masked)
logging.debug(
"%s: No valid pixels available",
date.date(),
)
ind_nodata += [ind]

else:
logging.debug(
"%s: %s valid pixels (Mean NDVI = %s)",
date.date(),
stats["count"],
round(stats["mean"], 2),
)
return ind_nodata


def get_satellite_expression_as_df(
gdf_fields_subset: DataFrame,
scene: Series,
feature: Feature,
expression_obj: Expression,
gsd: Union[float, int],
nodata: Union[float, int],
) -> DataFrame:
"""
Retrieves image for `scene_url` via sentera.pixels.com based on `expression_obj`, then samples the NDVI value for
each centroid in `gdf_fields_subset`.
Returns:
DataFrame: NDVI values with "field_id" and "geom_id" join keys.
"""
query_params_crop_ndvi = QueryParamsCrop(
url=EARTHSEARCH_SCENE_URL.format(collection=EarthSearchCollections.sentinel_2_l2a.name, id=scene["id"]),
feature=feature,
gsd=gsd,
format_=".tif",
expression=expression_obj.expression,
asset_as_band=True,
nodata=nodata,
resampling=Resampling.nearest.name,
)
crop_arable_wlist = Crop(
query_params=query_params_crop_ndvi,
clear_cache=False,
titiler_endpoint=TITILER_ENDPOINT,
mask_enum=Sentinel2_SCL_Group.ARABLE,
mask_asset="scl",
whitelist=True,
)
data_mask, profile_mask, tags = crop_arable_wlist.to_rasterio(
**{
"dtype": float32,
"band_names": [expression_obj.short_name],
"band_description": [expression_obj.short_name],
}
)

with MemoryFile() as memfile:
with memfile.open(**profile_mask) as src: # Open as DatasetWriter
src.write(data_mask)

# Do not sort xy in sample_gen function; order must be maintained for merging to primary keys
df_data = DataFrame(
data=sample_gen(
src,
xy=list(
gdf_fields_subset[gdf_fields_subset.geometry.name].apply(
lambda geometry: geometry.centroid.coords[0]
)
),
),
# columns=["ndvi"],
columns=[expression_obj.short_name],
)
# Remove any rows where ndvi is set to NODATA
# df_data.drop(df_data.loc[df_data["ndvi"] == nodata].index, inplace=True)
df_data.drop(df_data.loc[df_data[expression_obj.short_name] == nodata].index, inplace=True)
df_data.insert(0, "scene_id", scene["id"])
df_data.insert(1, "datetime", scene["datetime"])
df_data = concat(
[
gdf_fields_subset.reset_index(drop=True),
df_data,
],
axis=1,
)
return df_data
2 changes: 1 addition & 1 deletion pixels_utils/scenes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from ._scenes import parse_nested_stac_data, request_asset_info, search_stac_scenes
from pixels_utils.scenes._scenes import parse_nested_stac_data, request_asset_info, search_stac_scenes

__all__ = ("parse_nested_stac_data", "request_asset_info", "search_stac_scenes")
78 changes: 61 additions & 17 deletions pixels_utils/stac_catalogs/earthsearch/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ def _version(self):
return "v1"


def _generate_experession_override(expression: str, assets: list[str], assets_override: list[str]) -> str:
"""Return expression_override with assets replaced by assets_override."""
expression_override = expression
for asset_common, asset_collection in zip(assets, assets_override):
expression_override = expression_override.replace(asset_common, asset_collection)
return expression_override


def expression_from_collection(
collection: EarthSearchCollections = EarthSearchCollections.sentinel_2_l2a, spectral_index: str = "NDVI"
) -> Expression:
Expand All @@ -49,21 +57,56 @@ def expression_from_collection(
Returns:
Expression: The tailored Expression object.
"""
assert collection._version == "v1", (
f"The `collection` and `expression_from_collection()` function versions do not match ({collection._version} vs "
f'v1). Ensure both the "{collection}" and expression_from_collection() function are imported from the same '
"version of `pixels_utils.stac_catalogs.earthsearch`."
)
if spectral_index not in spyndex_indices.keys():
raise AssertionError(f"{spectral_index} is not a valid spectral index (must be available via spyndex.indices).")
if collection._version != "v1":
raise AssertionError(
f"The `collection` and `expression_from_collection()` function versions do not match ({collection._version} vs "
f'v1). Ensure both the "{collection}" and expression_from_collection() function are imported from the same '
"version of `pixels_utils.stac_catalogs.earthsearch`."
)
stac_collection_url = EARTHSEARCH_COLLECTION_URL.format(collection=collection.name)
stac_metadata = STACMetaData(collection_url=stac_collection_url)
# TODO: stac_metadata.parse_asset_bands("eo:bands", return_dataframe=True) for support of EarthSearch v0
assert set(Expression(spyndex_object=spyndex_indices[spectral_index]).assets).issubset(
set(stac_metadata.asset_names)
), f'Assets for spectral index "{spectral_index}" are not available in collection "{collection}".'
assert (
spectral_index in spyndex_indices.keys()
), f"{spectral_index} is not a valid spectral index (must be available via spyndex.indices)."
return Expression(spyndex_object=spyndex_indices["NDVI"])

# get list of `name` values where `common_name` matches spectral_index
assets_ = Expression(spyndex_object=spyndex_indices[spectral_index]).assets
expression_str_ = Expression(spyndex_object=spyndex_indices[spectral_index]).expression

df_collection_assets = stac_metadata.parse_asset_bands("eo:bands", return_dataframe=True)
df_expr_assets_ = df_collection_assets[df_collection_assets["common_name"].isin(assets_)][
["name", "common_name"]
].drop_duplicates(subset="common_name")

# Check if all `common_names` match `names` (i.e., collection_names)
if any([row["name"] != row["common_name"] for _, row in df_expr_assets_.iterrows()]):
# Raise error if collection does not have assets available
for asset in assets_:
if asset not in df_expr_assets_["common_name"].values:
raise ValueError(
f'"{asset}" is required for spectral index "{spectral_index}", but is not a valid asset (i.e., '
f'"common_name") in collection "{collection}".\nValid "common_names":\n'
f'{df_collection_assets["common_name"].values}'
)
# Set assets_override and expression_override based on available `name` values for the collection.
assets_override = [
df_expr_assets_[df_expr_assets_["common_name"] == asset]["name"].values[0] for asset in assets_
]
expression_override = _generate_experession_override(expression_str_, assets_, assets_override)
else:
assets_override, expression_override = None, None

expression = Expression(
spyndex_object=spyndex_indices[spectral_index],
assets_override=assets_override,
expression_override=expression_override,
)

if not set(expression.assets).issubset(set(stac_metadata.asset_names)):
raise AssertionError(
f'Assets for spectral index "{spectral_index}" are not available in collection "{collection}".'
)
return expression


def expressions_from_collection(
Expand All @@ -79,11 +122,12 @@ def expressions_from_collection(
Returns:
Expressions: The Expressions object containing all available spectral indices for the given collection.
"""
assert collection._version == "v1", (
f"The `collection` and `expression_from_collection()` function versions do not match ({collection._version} vs "
f'v1). Ensure both the "{collection}" and expression_from_collection() function are imported from the same '
"version of `pixels_utils.stac_catalogs.earthsearch`."
)
if collection._version != "v1":
raise AssertionError(
f"The `collection` and `expression_from_collection()` function versions do not match ({collection._version} vs "
f'v1). Ensure both the "{collection}" and expression_from_collection() function are imported from the same '
"version of `pixels_utils.stac_catalogs.earthsearch`."
)
# Get list of assets in collection and filter spyndex_bands by collection assets
stac_collection_url = EARTHSEARCH_COLLECTION_URL.format(collection=collection.name)
stac_metadata = STACMetaData(collection_url=stac_collection_url)
Expand Down
2 changes: 1 addition & 1 deletion pixels_utils/titiler/endpoints/stac/crop/_crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def to_rasterio(self, **kwargs) -> Tuple[ArrayLike, Profile, Dict]:
# TODO: Consider validating kwargs before passing to parse_crop_response()
data_mask, profile_mask, tags = parse_crop_response(
r=self.response,
**kwargs
**kwargs,
# **{"dtype": float32, "band_names": [collection_ndvi.short_name], "band_description": [collection_ndvi.short_name]},
)
return data_mask, profile_mask, tags
Loading

0 comments on commit 178bf1b

Please sign in to comment.