diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py index f649916..8170db1 100644 --- a/covid_api/api/api_v1/endpoints/detections.py +++ b/covid_api/api/api_v1/endpoints/detections.py @@ -33,5 +33,7 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str): key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", ) ) + # TODO: catch the specific exception that corresponds to a missing file + # and raise 404, otherwise raise a generic 500 error. except Exception: raise HTTPException(status_code=404, detail="Detections not found") diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index d050b0c..b2457a4 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,23 +1,182 @@ """API metadata.""" +import re +from concurrent import futures +from datetime import datetime, timedelta +from typing import List, Union + +from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat +from covid_api.core.config import API_VERSION_STR +from covid_api.db.static.datasets import datasets as _datasets +from covid_api.db.static.errors import InvalidIdentifier +from covid_api.db.static.sites import sites +from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException + +from starlette.requests import Request router = APIRouter() +# TODO: validate inputs with typing/pydantic models +def _get_mean_median(query, url, dataset): + + # format S3 URL template with spotlightId, if dataset is + # spotlight specific + if "{spotlightId}" in url: + if not query.spotlight_id: + raise HTTPException( + status_code=400, + detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", + ) + url = _insert_spotlight_id(url, query.spotlight_id) + try: + mean, median = get_zonal_stat(query.geojson, url) + return dict(mean=mean, median=median) + + except Exception: + raise HTTPException( + status_code=400, + detail=( + "Unable to calculate mean/median values. This either due to a bounding box " + "extending beyond the edges of the COG or there are no COGs available for the " + "requested date range." + ), + ) + + @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=TimelapseValue, + response_model=Union[List[TimelapseValue], TimelapseValue], + response_model_exclude_none=True, ) -def timelapse(query: TimelapseRequest): +def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" - if query.type == "no2": - url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" - else: - url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" - mean, median = get_zonal_stat(query.geojson, url) - return dict(mean=mean, median=median) + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + if query.date: + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + return _get_mean_median(query, url, dataset) + + # Gather a list of dates to query + if query.date_range: + + if dataset.time_unit == "day": + # Get start and end dates + start = datetime.strptime(query.date_range[0], "%Y_%m_%d") + end = datetime.strptime(query.date_range[1], "%Y_%m_%d") + + # Populated all days in between Add 1 to days to ensure it contains the end date as well + dates = [ + datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") + for x in range(0, (end - start).days + 1) + ] + + if dataset.time_unit == "month": + # Get start and end dates, as a + start = datetime.strptime(query.date_range[0], "%Y%m") + end = datetime.strptime(query.date_range[1], "%Y%m") + num_months = (end.year - start.year) * 12 + (end.month - start.month) + dates = [ + datetime.strftime((start + relativedelta(months=+x)), "%Y%m") + for x in range(0, num_months + 1) + ] + + with futures.ThreadPoolExecutor(max_workers=15) as executor: + future_stats_queries = { + executor.submit( + _get_mean_median, query, _insert_date(url, dataset, date), dataset + ): date + for date in dates + } + + stats = [] + + for future in futures.as_completed(future_stats_queries): + date = future_stats_queries[future] + try: + stats.append({"date": date, **future.result()}) + except HTTPException as e: + + stats.append({"date": date, "error": e.detail}) + return stats + + +def _get_dataset_metadata(request: Request, query: TimelapseRequest): + + scheme = request.url.scheme + host = request.headers["host"] + + if API_VERSION_STR: + host += API_VERSION_STR + + dataset = list( + filter( + lambda d: d.id == query.dataset_id, + _datasets.get_all(api_url=f"{scheme}://{host}").datasets, + ) + ) + + if not dataset: + raise HTTPException( + status_code=404, detail=f"No dataset found for id: {query.dataset_id}" + ) + + dataset = dataset[0] + + if dataset.source.type != "raster": + raise HTTPException( + status_code=400, + detail=f"Dataset {query.dataset_id} is not a raster-type dataset", + ) + + return dataset + + +def _extract_s3_url(dataset: Dataset): + url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) + if not url_search: + raise HTTPException(status_code=500) + + return url_search.group(1) + + +def _insert_date(url: str, dataset: Dataset, date: str): + _validate_query_date(dataset, date) + return url.replace("{date}", date) + + +def _validate_query_date(dataset: Dataset, date: str): + date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" + try: + return datetime.strptime(date, date_format) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", + ) + + +def _insert_spotlight_id(url: str, spotlight_id: str): + if not spotlight_id: + raise HTTPException(status_code=400, detail="Missing spotlightId") + try: + sites.get(spotlight_id) + except InvalidIdentifier: + raise HTTPException( + status_code=404, detail=f"No spotlight found for id: {spotlight_id}" + ) + + return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 1267ea0..8c2d406 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -223,8 +223,13 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) + # Create a mask of the data that filters out the tile's `nodata` value. In order + # to ensure the average calculation isn't incorrectly affected by large, negative, + # `nodata` values. + masked_data = np.ma.masked_equal(data[0], src.nodata) + return ( - np.average(data[0], weights=pctcover), + np.average(masked_data, weights=pctcover), np.nanmedian(data), ) diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json index 987f70c..d69f09e 100644 --- a/covid_api/db/static/datasets/no2-diff.json +++ b/covid_api/db/static/datasets/no2-diff.json @@ -3,7 +3,7 @@ "name": "NO\u2082 (Diff)", "type": "raster-timeseries", "time_unit": "month", - "is_periodic": false, + "is_periodic": true, "s3_location": "OMNO2d_HRMDifference", "source": { "type": "raster", diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 8275a1d..1e7dfa6 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,8 +1,15 @@ """Tilelapse models.""" +import re +from typing import List, Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel +from pydantic import BaseModel, validator + + +def to_camel(s): + """Convert string s from `snake_case` to `camelCase`""" + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) class PolygonFeature(Feature): @@ -12,15 +19,35 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): - """"Timelapse values model.""" + """ "Timelapse values model.""" - mean: float - median: float + mean: Optional[float] + median: Optional[float] + date: Optional[str] + error: Optional[str] class TimelapseRequest(BaseModel): - """"Timelapse request model.""" + """ "Timelapse request model.""" - month: str + # TODO: parse this into a python `datetime` object (maybe using a validator? ) + # TODO: validate that exactly one of `date` or `date_range` is supplied + date: Optional[str] + date_range: Optional[List[str]] geojson: PolygonFeature - type: str + dataset_id: str + spotlight_id: Optional[str] + + @validator("date_range") + def validate_date_objects(cls, v): + + """Validator""" + if not len(v) == 2: + raise ValueError("Field `dateRange` must contain exactly 2 dates") + return v + + class Config: + """Generate alias to convert `camelCase` requests to `snake_case` fields to be used + within the code""" + + alias_generator = to_camel diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py index 49abb49..fb6a010 100644 --- a/lambda/dataset_metadata_generator/src/main.py +++ b/lambda/dataset_metadata_generator/src/main.py @@ -27,10 +27,6 @@ ).Bucket(BUCKET_NAME) -DT_FORMAT = "%Y-%m-%d" -MT_FORMAT = "%Y%m" - - def handler(event, context): """ Params: diff --git a/setup.py b/setup.py index 1d32a67..4c8d413 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ "geojson-pydantic", "requests", "mercantile", + "python-dateutil", ] extra_reqs = { "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"],