Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: abstract timelapse endpoint #113

Merged
merged 10 commits into from
Jun 10, 2021
2 changes: 2 additions & 0 deletions covid_api/api/api_v1/endpoints/detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,7 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str):
key=f"detections-{ml_type.value}/{site.value}/{date}.geojson",
)
)
# TODO: catch the specific exception that corresponds to a missing file
# and raise 404, otherwise raise a generic 500 error.
except Exception:
raise HTTPException(status_code=404, detail="Detections not found")
177 changes: 168 additions & 9 deletions covid_api/api/api_v1/endpoints/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,182 @@
"""API metadata."""
import re
from concurrent import futures
from datetime import datetime, timedelta
from typing import List, Union

from dateutil.relativedelta import relativedelta

from covid_api.api.utils import get_zonal_stat
from covid_api.core.config import API_VERSION_STR
from covid_api.db.static.datasets import datasets as _datasets
from covid_api.db.static.errors import InvalidIdentifier
from covid_api.db.static.sites import sites
from covid_api.models.static import Dataset
from covid_api.models.timelapse import TimelapseRequest, TimelapseValue

from fastapi import APIRouter
from fastapi import APIRouter, HTTPException

from starlette.requests import Request

router = APIRouter()


# TODO: validate inputs with typing/pydantic models
def _get_mean_median(query, url, dataset):

# format S3 URL template with spotlightId, if dataset is
# spotlight specific
if "{spotlightId}" in url:
if not query.spotlight_id:
raise HTTPException(
status_code=400,
detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}",
)
url = _insert_spotlight_id(url, query.spotlight_id)
try:
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)

except Exception:
raise HTTPException(
status_code=400,
detail=(
"Unable to calculate mean/median values. This either due to a bounding box "
"extending beyond the edges of the COG or there are no COGs available for the "
"requested date range."
),
)


@router.post(
"/timelapse",
responses={200: {"description": "Return timelapse values for a given geometry"}},
response_model=TimelapseValue,
response_model=Union[List[TimelapseValue], TimelapseValue],
response_model_exclude_none=True,
)
def timelapse(query: TimelapseRequest):
def timelapse(request: Request, query: TimelapseRequest):
"""Handle /timelapse requests."""
if query.type == "no2":
url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif"
else:
url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif"
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)

# get dataset metadata for the requested dataset
# will be used to validate other parts of the query
dataset = _get_dataset_metadata(request, query)

# extract S3 URL template from dataset metadata info
url = _extract_s3_url(dataset)

if query.date:

# format S3 URL template with date object
url = _insert_date(url, dataset, query.date)
return _get_mean_median(query, url, dataset)

# Gather a list of dates to query
if query.date_range:

if dataset.time_unit == "day":
# Get start and end dates
start = datetime.strptime(query.date_range[0], "%Y_%m_%d")
end = datetime.strptime(query.date_range[1], "%Y_%m_%d")

# Populated all days in between Add 1 to days to ensure it contains the end date as well
dates = [
datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d")
for x in range(0, (end - start).days + 1)
]

if dataset.time_unit == "month":
# Get start and end dates, as a
start = datetime.strptime(query.date_range[0], "%Y%m")
end = datetime.strptime(query.date_range[1], "%Y%m")
num_months = (end.year - start.year) * 12 + (end.month - start.month)
dates = [
datetime.strftime((start + relativedelta(months=+x)), "%Y%m")
for x in range(0, num_months + 1)
]

with futures.ThreadPoolExecutor(max_workers=15) as executor:
future_stats_queries = {
executor.submit(
_get_mean_median, query, _insert_date(url, dataset, date), dataset
): date
for date in dates
}

stats = []

for future in futures.as_completed(future_stats_queries):
date = future_stats_queries[future]
try:
stats.append({"date": date, **future.result()})
except HTTPException as e:

stats.append({"date": date, "error": e.detail})
return stats


def _get_dataset_metadata(request: Request, query: TimelapseRequest):

scheme = request.url.scheme
host = request.headers["host"]

if API_VERSION_STR:
host += API_VERSION_STR

dataset = list(
filter(
lambda d: d.id == query.dataset_id,
_datasets.get_all(api_url=f"{scheme}://{host}").datasets,
)
)

if not dataset:
raise HTTPException(
status_code=404, detail=f"No dataset found for id: {query.dataset_id}"
)

dataset = dataset[0]

if dataset.source.type != "raster":
raise HTTPException(
status_code=400,
detail=f"Dataset {query.dataset_id} is not a raster-type dataset",
)

return dataset


def _extract_s3_url(dataset: Dataset):
url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0])
if not url_search:
raise HTTPException(status_code=500)

return url_search.group(1)


def _insert_date(url: str, dataset: Dataset, date: str):
_validate_query_date(dataset, date)
return url.replace("{date}", date)


def _validate_query_date(dataset: Dataset, date: str):
date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m"
try:
return datetime.strptime(date, date_format)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM",
)


def _insert_spotlight_id(url: str, spotlight_id: str):
if not spotlight_id:
raise HTTPException(status_code=400, detail="Missing spotlightId")
try:
sites.get(spotlight_id)
except InvalidIdentifier:
raise HTTPException(
status_code=404, detail=f"No spotlight found for id: {spotlight_id}"
)

return url.replace("{spotlightId}", spotlight_id)
7 changes: 6 additions & 1 deletion covid_api/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,13 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]:
# calculate the coverage of pixels for weighting
pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:])

# Create a mask of the data that filters out the tile's `nodata` value. In order
# to ensure the average calculation isn't incorrectly affected by large, negative,
# `nodata` values.
masked_data = np.ma.masked_equal(data[0], src.nodata)

return (
np.average(data[0], weights=pctcover),
np.average(masked_data, weights=pctcover),
np.nanmedian(data),
)

Expand Down
2 changes: 1 addition & 1 deletion covid_api/db/static/datasets/no2-diff.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name": "NO\u2082 (Diff)",
"type": "raster-timeseries",
"time_unit": "month",
"is_periodic": false,
"is_periodic": true,
"s3_location": "OMNO2d_HRMDifference",
"source": {
"type": "raster",
Expand Down
41 changes: 34 additions & 7 deletions covid_api/models/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
"""Tilelapse models."""
import re
from typing import List, Optional

from geojson_pydantic.features import Feature
from geojson_pydantic.geometries import Polygon
from pydantic import BaseModel
from pydantic import BaseModel, validator


def to_camel(s):
"""Convert string s from `snake_case` to `camelCase`"""
return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s)


class PolygonFeature(Feature):
Expand All @@ -12,15 +19,35 @@ class PolygonFeature(Feature):


class TimelapseValue(BaseModel):
""""Timelapse values model."""
""" "Timelapse values model."""

mean: float
median: float
mean: Optional[float]
median: Optional[float]
date: Optional[str]
error: Optional[str]


class TimelapseRequest(BaseModel):
""""Timelapse request model."""
""" "Timelapse request model."""

month: str
# TODO: parse this into a python `datetime` object (maybe using a validator? )
# TODO: validate that exactly one of `date` or `date_range` is supplied
date: Optional[str]
date_range: Optional[List[str]]
geojson: PolygonFeature
type: str
dataset_id: str
spotlight_id: Optional[str]

@validator("date_range")
def validate_date_objects(cls, v):

"""Validator"""
if not len(v) == 2:
raise ValueError("Field `dateRange` must contain exactly 2 dates")
return v

class Config:
"""Generate alias to convert `camelCase` requests to `snake_case` fields to be used
within the code"""

alias_generator = to_camel
4 changes: 0 additions & 4 deletions lambda/dataset_metadata_generator/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
).Bucket(BUCKET_NAME)


DT_FORMAT = "%Y-%m-%d"
MT_FORMAT = "%Y%m"


def handler(event, context):
"""
Params:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"geojson-pydantic",
"requests",
"mercantile",
"python-dateutil",
]
extra_reqs = {
"dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"],
Expand Down