diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f46ff4..2988e5d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,7 +40,7 @@ jobs: command: | if [ "${CIRCLE_BRANCH}" == "develop" ]; then STAGE='staging' cdk deploy covid-api-dataset-metadata-generator-staging --region us-east-1 --require-approval never - STAGE='staging' VPC_ID='vpc-0fa3007e738c7bbdf' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never + STAGE='staging' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never fi deploy-production: diff --git a/.gitignore b/.gitignore index 9a55f92..1f4490c 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,9 @@ wheels/ *.egg-info/ .installed.cfg *.egg +Pipfile +cdk.context.json # PyInstaller # Usually these files are written by a python script from a template diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index d050b0c..96debde 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,23 +1,185 @@ """API metadata.""" +import re +from concurrent import futures +from datetime import datetime, timedelta +from typing import List, Union + +from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat +from covid_api.core.config import API_VERSION_STR +from covid_api.db.static.datasets import datasets as _datasets +from covid_api.db.static.errors import InvalidIdentifier +from covid_api.db.static.sites import sites +from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException + +from starlette.requests import Request router = APIRouter() +# TODO: validate inputs with typing/pydantic models +def _get_mean_median(query, url, dataset): + + # format S3 URL template with spotlightId, if dataset is + # spotlight specific + if "{spotlightId}" in url: + if not query.spotlight_id: + raise HTTPException( + status_code=400, + detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", + ) + url = _insert_spotlight_id(url, query.spotlight_id) + try: + mean, median = get_zonal_stat(query.geojson, url) + return dict(mean=mean, median=median) + + except Exception: + raise HTTPException( + status_code=400, + detail=( + "Unable to calculate mean/median values. This either due to a bounding box " + "extending beyond the edges of the COG or there are no COGs available for the " + "requested date range." + ), + ) + + @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=TimelapseValue, + response_model=Union[List[TimelapseValue], TimelapseValue], + response_model_exclude_none=True, ) -def timelapse(query: TimelapseRequest): +def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" - if query.type == "no2": - url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" - else: - url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" - mean, median = get_zonal_stat(query.geojson, url) - return dict(mean=mean, median=median) + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + if query.date: + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + return _get_mean_median(query, url, dataset) + + if query.date_range: + + if dataset.time_unit == "day": + # Get start and end dates + start = _validate_query_date(dataset, query.date_range[0]) + end = _validate_query_date(dataset, query.date_range[1]) + + # Populate all days in between Add 1 to days to ensure it contains the end date as well + dates = [ + datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") + for x in range(0, (end - start).days + 1) + ] + + if dataset.time_unit == "month": + start = datetime.strptime(query.date_range[0], "%Y%m") + end = datetime.strptime(query.date_range[1], "%Y%m") + + num_months = (end.year - start.year) * 12 + (end.month - start.month) + + dates = [ + datetime.strftime((start + relativedelta(months=+x)), "%Y%m") + for x in range(0, num_months + 1) + ] + + with futures.ThreadPoolExecutor(max_workers=10) as executor: + future_stats_queries = { + executor.submit( + _get_mean_median, query, _insert_date(url, dataset, date), dataset + ): date + for date in dates + } + + stats = [] + + for future in futures.as_completed(future_stats_queries): + date = future_stats_queries[future] + try: + stats.append({"date": date, **future.result()}) + except HTTPException as e: + stats.append({"date": date, "error": e.detail}) + + return sorted(stats, key=lambda s: s["date"]) + + +def _get_dataset_metadata(request: Request, query: TimelapseRequest): + + scheme = request.url.scheme + host = request.headers["host"] + + if API_VERSION_STR: + host += API_VERSION_STR + + dataset = list( + filter( + lambda d: d.id == query.dataset_id, + _datasets.get_all(api_url=f"{scheme}://{host}").datasets, + ) + ) + + if not dataset: + raise HTTPException( + status_code=404, detail=f"No dataset found for id: {query.dataset_id}" + ) + + dataset = dataset[0] + + if dataset.source.type != "raster": + raise HTTPException( + status_code=400, + detail=f"Dataset {query.dataset_id} is not a raster-type dataset", + ) + + return dataset + + +def _extract_s3_url(dataset: Dataset): + url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) + if not url_search: + raise HTTPException(status_code=500) + + return url_search.group(1) + + +def _insert_date(url: str, dataset: Dataset, date: str): + _validate_query_date(dataset, date) + return url.replace("{date}", date) + + +def _validate_query_date(dataset: Dataset, date: str): + date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" + try: + return datetime.strptime(date, date_format) + except ValueError: + raise HTTPException( + status_code=400, + detail=( + f"Invalid date format. {date} should be like " + f"{'YYYYMM' if dataset.time_unit == 'month' else 'YYYY_MM_DD'}" + ), + ) + + +def _insert_spotlight_id(url: str, spotlight_id: str): + if not spotlight_id: + raise HTTPException(status_code=400, detail="Missing spotlightId") + try: + sites.get(spotlight_id) + except InvalidIdentifier: + raise HTTPException( + status_code=404, detail=f"No spotlight found for id: {spotlight_id}" + ) + + return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/core/config.py b/covid_api/core/config.py index def35db..10cc2f1 100644 --- a/covid_api/core/config.py +++ b/covid_api/core/config.py @@ -31,3 +31,5 @@ DT_FORMAT = "%Y-%m-%d" MT_FORMAT = "%Y%m" PLANET_API_KEY = os.environ.get("PLANET_API_KEY") + +TIMELAPSE_MAX_AREA = 200000 # km^2 diff --git a/covid_api/db/static/datasets/fb-population-density.json b/covid_api/db/static/datasets/fb-population-density.json index 6151023..f279868 100644 --- a/covid_api/db/static/datasets/fb-population-density.json +++ b/covid_api/db/static/datasets/fb-population-density.json @@ -35,16 +35,15 @@ }, "legend": { "type": "gradient", - "min": "less", - "max": "more", + "min": "0 people/30m²", + "max": "69 people/30m²", "stops": [ - "#99c5e0", - "#f9eaa9", - "#f7765d", - "#c13b72", - "#461070", - "#050308" + "#FFEFCB", + "#FBA54A", + "#FB9F46", + "#F35228", + "#BD0026" ] }, - "info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas" + "info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas, with a 30m² resolution" } \ No newline at end of file diff --git a/covid_api/main.py b/covid_api/main.py index 02ac04b..c31a136 100644 --- a/covid_api/main.py +++ b/covid_api/main.py @@ -44,7 +44,7 @@ CORSMiddleware, allow_origins=origins, allow_credentials=True, - allow_methods=["GET"], + allow_methods=["*"], allow_headers=["*"], ) diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 8275a1d..341009c 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,8 +1,19 @@ """Tilelapse models.""" +import re +from typing import List, Optional + +from area import area from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel +from pydantic import BaseModel, validator + +from covid_api.core import config + + +def to_camel(s): + """Convert string s from `snake_case` to `camelCase`""" + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) class PolygonFeature(Feature): @@ -14,6 +25,7 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): """"Timelapse values model.""" + date: Optional[str] mean: float median: float @@ -21,6 +33,38 @@ class TimelapseValue(BaseModel): class TimelapseRequest(BaseModel): """"Timelapse request model.""" - month: str + # TODO: parse date/date_range into a python `datetime` object (maybe using a validator? ) + # TODO: validate that exactly one of `date` or `date_range` is supplied + date: Optional[str] + date_range: Optional[List[str]] geojson: PolygonFeature - type: str + dataset_id: str + spotlight_id: Optional[str] + + @validator("geojson") + def validate_query_area(cls, v, values): + """Ensure that requested AOI is is not larger than 200 000 km^2, otherwise + query takes too long""" + if area(v.geometry.dict()) / ( + 1000 * 1000 + ) > config.TIMELAPSE_MAX_AREA and values.get("date_range"): + + raise ValueError( + "AOI cannot exceed 200 000 km^2, when queried with a date range. " + "To query with this AOI please query with a single date" + ) + return v + + @validator("date_range") + def validate_date_objects(cls, v): + + """Validator""" + if not len(v) == 2: + raise ValueError("Field `dateRange` must contain exactly 2 dates") + return v + + class Config: + """Generate alias to convert `camelCase` requests to `snake_case` fields to be used + within the code""" + + alias_generator = to_camel diff --git a/setup.py b/setup.py index 1d32a67..0f14a55 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ "rio-tiler==2.0a.11", "fiona", "shapely", + "area", "rasterio==1.1.8", "rasterstats", "geojson-pydantic", @@ -42,7 +43,7 @@ setup( name="covid_api", - version="0.6.1", + version="0.6.2", description=u"", long_description=long_description, long_description_content_type="text/markdown", diff --git a/stack/config.py b/stack/config.py index 0e310d0..89a98d9 100644 --- a/stack/config.py +++ b/stack/config.py @@ -39,8 +39,10 @@ # LAMBDA # # # ################################################################################ -TIMEOUT: int = 10 -MEMORY: int = 1536 +# TIMEOUT: int = 10 +TIMEOUT: int = 30 +MEMORY: int = 3008 +# MEMORY: int = 10240 # stack skips setting concurrency if this value is 0 # the stack will instead use unreserved lambda concurrency