Skip to content

Commit

Permalink
Merge pull request #132 from NASA-IMPACT/develop
Browse files Browse the repository at this point in the history
Version 0.6.2 - `dateRange` enabled `/timelapse` endpoint
  • Loading branch information
leothomas authored Jun 11, 2021
2 parents 3539503 + cac6ecd commit 4e1ba31
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
command: |
if [ "${CIRCLE_BRANCH}" == "develop" ]; then
STAGE='staging' cdk deploy covid-api-dataset-metadata-generator-staging --region us-east-1 --require-approval never
STAGE='staging' VPC_ID='vpc-0fa3007e738c7bbdf' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never
STAGE='staging' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never
fi
deploy-production:
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
Pipfile

cdk.context.json

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
180 changes: 171 additions & 9 deletions covid_api/api/api_v1/endpoints/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,185 @@
"""API metadata."""
import re
from concurrent import futures
from datetime import datetime, timedelta
from typing import List, Union

from dateutil.relativedelta import relativedelta

from covid_api.api.utils import get_zonal_stat
from covid_api.core.config import API_VERSION_STR
from covid_api.db.static.datasets import datasets as _datasets
from covid_api.db.static.errors import InvalidIdentifier
from covid_api.db.static.sites import sites
from covid_api.models.static import Dataset
from covid_api.models.timelapse import TimelapseRequest, TimelapseValue

from fastapi import APIRouter
from fastapi import APIRouter, HTTPException

from starlette.requests import Request

router = APIRouter()


# TODO: validate inputs with typing/pydantic models
def _get_mean_median(query, url, dataset):

# format S3 URL template with spotlightId, if dataset is
# spotlight specific
if "{spotlightId}" in url:
if not query.spotlight_id:
raise HTTPException(
status_code=400,
detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}",
)
url = _insert_spotlight_id(url, query.spotlight_id)
try:
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)

except Exception:
raise HTTPException(
status_code=400,
detail=(
"Unable to calculate mean/median values. This either due to a bounding box "
"extending beyond the edges of the COG or there are no COGs available for the "
"requested date range."
),
)


@router.post(
"/timelapse",
responses={200: {"description": "Return timelapse values for a given geometry"}},
response_model=TimelapseValue,
response_model=Union[List[TimelapseValue], TimelapseValue],
response_model_exclude_none=True,
)
def timelapse(query: TimelapseRequest):
def timelapse(request: Request, query: TimelapseRequest):
"""Handle /timelapse requests."""
if query.type == "no2":
url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif"
else:
url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif"
mean, median = get_zonal_stat(query.geojson, url)
return dict(mean=mean, median=median)

# get dataset metadata for the requested dataset
# will be used to validate other parts of the query
dataset = _get_dataset_metadata(request, query)

# extract S3 URL template from dataset metadata info
url = _extract_s3_url(dataset)

if query.date:

# format S3 URL template with date object
url = _insert_date(url, dataset, query.date)
return _get_mean_median(query, url, dataset)

if query.date_range:

if dataset.time_unit == "day":
# Get start and end dates
start = _validate_query_date(dataset, query.date_range[0])
end = _validate_query_date(dataset, query.date_range[1])

# Populate all days in between Add 1 to days to ensure it contains the end date as well
dates = [
datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d")
for x in range(0, (end - start).days + 1)
]

if dataset.time_unit == "month":
start = datetime.strptime(query.date_range[0], "%Y%m")
end = datetime.strptime(query.date_range[1], "%Y%m")

num_months = (end.year - start.year) * 12 + (end.month - start.month)

dates = [
datetime.strftime((start + relativedelta(months=+x)), "%Y%m")
for x in range(0, num_months + 1)
]

with futures.ThreadPoolExecutor(max_workers=10) as executor:
future_stats_queries = {
executor.submit(
_get_mean_median, query, _insert_date(url, dataset, date), dataset
): date
for date in dates
}

stats = []

for future in futures.as_completed(future_stats_queries):
date = future_stats_queries[future]
try:
stats.append({"date": date, **future.result()})
except HTTPException as e:
stats.append({"date": date, "error": e.detail})

return sorted(stats, key=lambda s: s["date"])


def _get_dataset_metadata(request: Request, query: TimelapseRequest):

scheme = request.url.scheme
host = request.headers["host"]

if API_VERSION_STR:
host += API_VERSION_STR

dataset = list(
filter(
lambda d: d.id == query.dataset_id,
_datasets.get_all(api_url=f"{scheme}://{host}").datasets,
)
)

if not dataset:
raise HTTPException(
status_code=404, detail=f"No dataset found for id: {query.dataset_id}"
)

dataset = dataset[0]

if dataset.source.type != "raster":
raise HTTPException(
status_code=400,
detail=f"Dataset {query.dataset_id} is not a raster-type dataset",
)

return dataset


def _extract_s3_url(dataset: Dataset):
url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0])
if not url_search:
raise HTTPException(status_code=500)

return url_search.group(1)


def _insert_date(url: str, dataset: Dataset, date: str):
_validate_query_date(dataset, date)
return url.replace("{date}", date)


def _validate_query_date(dataset: Dataset, date: str):
date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m"
try:
return datetime.strptime(date, date_format)
except ValueError:
raise HTTPException(
status_code=400,
detail=(
f"Invalid date format. {date} should be like "
f"{'YYYYMM' if dataset.time_unit == 'month' else 'YYYY_MM_DD'}"
),
)


def _insert_spotlight_id(url: str, spotlight_id: str):
if not spotlight_id:
raise HTTPException(status_code=400, detail="Missing spotlightId")
try:
sites.get(spotlight_id)
except InvalidIdentifier:
raise HTTPException(
status_code=404, detail=f"No spotlight found for id: {spotlight_id}"
)

return url.replace("{spotlightId}", spotlight_id)
2 changes: 2 additions & 0 deletions covid_api/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@
DT_FORMAT = "%Y-%m-%d"
MT_FORMAT = "%Y%m"
PLANET_API_KEY = os.environ.get("PLANET_API_KEY")

TIMELAPSE_MAX_AREA = 200000 # km^2
17 changes: 8 additions & 9 deletions covid_api/db/static/datasets/fb-population-density.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,15 @@
},
"legend": {
"type": "gradient",
"min": "less",
"max": "more",
"min": "0 people/30m²",
"max": "69 people/30m²",
"stops": [
"#99c5e0",
"#f9eaa9",
"#f7765d",
"#c13b72",
"#461070",
"#050308"
"#FFEFCB",
"#FBA54A",
"#FB9F46",
"#F35228",
"#BD0026"
]
},
"info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas"
"info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas, with a 30m² resolution"
}
2 changes: 1 addition & 1 deletion covid_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["GET"],
allow_methods=["*"],
allow_headers=["*"],
)

Expand Down
50 changes: 47 additions & 3 deletions covid_api/models/timelapse.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
"""Tilelapse models."""

import re
from typing import List, Optional

from area import area
from geojson_pydantic.features import Feature
from geojson_pydantic.geometries import Polygon
from pydantic import BaseModel
from pydantic import BaseModel, validator

from covid_api.core import config


def to_camel(s):
"""Convert string s from `snake_case` to `camelCase`"""
return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s)


class PolygonFeature(Feature):
Expand All @@ -14,13 +25,46 @@ class PolygonFeature(Feature):
class TimelapseValue(BaseModel):
""""Timelapse values model."""

date: Optional[str]
mean: float
median: float


class TimelapseRequest(BaseModel):
""""Timelapse request model."""

month: str
# TODO: parse date/date_range into a python `datetime` object (maybe using a validator? )
# TODO: validate that exactly one of `date` or `date_range` is supplied
date: Optional[str]
date_range: Optional[List[str]]
geojson: PolygonFeature
type: str
dataset_id: str
spotlight_id: Optional[str]

@validator("geojson")
def validate_query_area(cls, v, values):
"""Ensure that requested AOI is is not larger than 200 000 km^2, otherwise
query takes too long"""
if area(v.geometry.dict()) / (
1000 * 1000
) > config.TIMELAPSE_MAX_AREA and values.get("date_range"):

raise ValueError(
"AOI cannot exceed 200 000 km^2, when queried with a date range. "
"To query with this AOI please query with a single date"
)
return v

@validator("date_range")
def validate_date_objects(cls, v):

"""Validator"""
if not len(v) == 2:
raise ValueError("Field `dateRange` must contain exactly 2 dates")
return v

class Config:
"""Generate alias to convert `camelCase` requests to `snake_case` fields to be used
within the code"""

alias_generator = to_camel
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"rio-tiler==2.0a.11",
"fiona",
"shapely",
"area",
"rasterio==1.1.8",
"rasterstats",
"geojson-pydantic",
Expand Down Expand Up @@ -42,7 +43,7 @@

setup(
name="covid_api",
version="0.6.1",
version="0.6.2",
description=u"",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
6 changes: 4 additions & 2 deletions stack/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@
# LAMBDA #
# #
################################################################################
TIMEOUT: int = 10
MEMORY: int = 1536
# TIMEOUT: int = 10
TIMEOUT: int = 30
MEMORY: int = 3008
# MEMORY: int = 10240

# stack skips setting concurrency if this value is 0
# the stack will instead use unreserved lambda concurrency
Expand Down

0 comments on commit 4e1ba31

Please sign in to comment.