NASA-IMPACT · leothomas · Jun 10, 2021 · Jan 26, 2021 · Jan 26, 2021 · Jan 27, 2021
diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py
@@ -33,5 +33,7 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str):
                 key=f"detections-{ml_type.value}/{site.value}/{date}.geojson",
             )
         )
+    # TODO: catch the specific exception that corresponds to a missing file
+    # and raise 404, otherwise raise a generic 500 error.
     except Exception:
         raise HTTPException(status_code=404, detail="Detections not found")
diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py
@@ -1,23 +1,182 @@
 """API metadata."""
+import re
+from concurrent import futures
+from datetime import datetime, timedelta
+from typing import List, Union
+
+from dateutil.relativedelta import relativedelta
 
 from covid_api.api.utils import get_zonal_stat
+from covid_api.core.config import API_VERSION_STR
+from covid_api.db.static.datasets import datasets as _datasets
+from covid_api.db.static.errors import InvalidIdentifier
+from covid_api.db.static.sites import sites
+from covid_api.models.static import Dataset
 from covid_api.models.timelapse import TimelapseRequest, TimelapseValue
 
-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
+
+from starlette.requests import Request
 
 router = APIRouter()
 
 
+# TODO: validate inputs with typing/pydantic models
+def _get_mean_median(query, url, dataset):
+
+    # format S3 URL template with spotlightId, if dataset is
+    # spotlight specific
+    if "{spotlightId}" in url:
+        if not query.spotlight_id:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}",
+            )
+        url = _insert_spotlight_id(url, query.spotlight_id)
+    try:
+        mean, median = get_zonal_stat(query.geojson, url)
+        return dict(mean=mean, median=median)
+
+    except Exception:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "Unable to calculate mean/median values. This either due to a bounding box "
+                "extending beyond the edges of the COG or there are no COGs available for the "
+                "requested date range."
+            ),
+        )
+
+
 @router.post(
     "/timelapse",
     responses={200: {"description": "Return timelapse values for a given geometry"}},
-    response_model=TimelapseValue,
+    response_model=Union[List[TimelapseValue], TimelapseValue],
+    response_model_exclude_none=True,
 )
-def timelapse(query: TimelapseRequest):
+def timelapse(request: Request, query: TimelapseRequest):
     """Handle /timelapse requests."""
-    if query.type == "no2":
-        url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif"
-    else:
-        url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif"
-    mean, median = get_zonal_stat(query.geojson, url)
-    return dict(mean=mean, median=median)
+
+    # get dataset metadata for the requested dataset
+    # will be used to validate other parts of the query
+    dataset = _get_dataset_metadata(request, query)
+
+    # extract S3 URL template from dataset metadata info
+    url = _extract_s3_url(dataset)
+
+    if query.date:
+
+        # format S3 URL template with date object
+        url = _insert_date(url, dataset, query.date)
+        return _get_mean_median(query, url, dataset)
+
+    # Gather a list of dates to query
+    if query.date_range:
+
+        if dataset.time_unit == "day":
+            # Get start and end dates
+            start = datetime.strptime(query.date_range[0], "%Y_%m_%d")
+            end = datetime.strptime(query.date_range[1], "%Y_%m_%d")
+
+            # Populated all days in between Add 1 to days to ensure it contains the end date as well
+            dates = [
+                datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d")
+                for x in range(0, (end - start).days + 1)
+            ]
+
+        if dataset.time_unit == "month":
+            # Get start and end dates, as a
+            start = datetime.strptime(query.date_range[0], "%Y%m")
+            end = datetime.strptime(query.date_range[1], "%Y%m")
+            num_months = (end.year - start.year) * 12 + (end.month - start.month)
+            dates = [
+                datetime.strftime((start + relativedelta(months=+x)), "%Y%m")
+                for x in range(0, num_months + 1)
+            ]
+
+        with futures.ThreadPoolExecutor(max_workers=15) as executor:
+            future_stats_queries = {
+                executor.submit(
+                    _get_mean_median, query, _insert_date(url, dataset, date), dataset
+                ): date
+                for date in dates
+            }
+
+            stats = []
+
+            for future in futures.as_completed(future_stats_queries):
+                date = future_stats_queries[future]
+                try:
+                    stats.append({"date": date, **future.result()})
+                except HTTPException as e:
+
+                    stats.append({"date": date, "error": e.detail})
+            return stats
+
+
+def _get_dataset_metadata(request: Request, query: TimelapseRequest):
+
+    scheme = request.url.scheme
+    host = request.headers["host"]
+
+    if API_VERSION_STR:
+        host += API_VERSION_STR
+
+    dataset = list(
+        filter(
+            lambda d: d.id == query.dataset_id,
+            _datasets.get_all(api_url=f"{scheme}://{host}").datasets,
+        )
+    )
+
+    if not dataset:
+        raise HTTPException(
+            status_code=404, detail=f"No dataset found for id: {query.dataset_id}"
+        )
+
+    dataset = dataset[0]
+
+    if dataset.source.type != "raster":
+        raise HTTPException(
+            status_code=400,
+            detail=f"Dataset {query.dataset_id} is not a raster-type dataset",
+        )
+
+    return dataset
+
+
+def _extract_s3_url(dataset: Dataset):
+    url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0])
+    if not url_search:
+        raise HTTPException(status_code=500)
+
+    return url_search.group(1)
+
+
+def _insert_date(url: str, dataset: Dataset, date: str):
+    _validate_query_date(dataset, date)
+    return url.replace("{date}", date)
+
+
+def _validate_query_date(dataset: Dataset, date: str):
+    date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m"
+    try:
+        return datetime.strptime(date, date_format)
+    except ValueError:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM",
+        )
+
+
+def _insert_spotlight_id(url: str, spotlight_id: str):
+    if not spotlight_id:
+        raise HTTPException(status_code=400, detail="Missing spotlightId")
+    try:
+        sites.get(spotlight_id)
+    except InvalidIdentifier:
+        raise HTTPException(
+            status_code=404, detail=f"No spotlight found for id: {spotlight_id}"
+        )
+
+    return url.replace("{spotlightId}", spotlight_id)
diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py
@@ -223,8 +223,13 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]:
         # calculate the coverage of pixels for weighting
         pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:])
 
+        # Create a mask of the data that filters out the tile's `nodata` value. In order
+        # to ensure the average calculation isn't incorrectly affected by large, negative,
+        # `nodata` values.
+        masked_data = np.ma.masked_equal(data[0], src.nodata)
+
         return (
-            np.average(data[0], weights=pctcover),
+            np.average(masked_data, weights=pctcover),
             np.nanmedian(data),
         )
 

diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json
@@ -3,7 +3,7 @@
     "name": "NO\u2082 (Diff)",
     "type": "raster-timeseries",
     "time_unit": "month",
-    "is_periodic": false,
+    "is_periodic": true,
     "s3_location": "OMNO2d_HRMDifference",
     "source": {
         "type": "raster",

diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py
@@ -1,8 +1,15 @@
 """Tilelapse models."""
+import re
+from typing import List, Optional
 
 from geojson_pydantic.features import Feature
 from geojson_pydantic.geometries import Polygon
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
+
+
+def to_camel(s):
+    """Convert string s from `snake_case` to `camelCase`"""
+    return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s)
 
 
 class PolygonFeature(Feature):
@@ -12,15 +19,35 @@ class PolygonFeature(Feature):
 
 
 class TimelapseValue(BaseModel):
-    """"Timelapse values model."""
+    """ "Timelapse values model."""
 
-    mean: float
-    median: float
+    mean: Optional[float]
+    median: Optional[float]
+    date: Optional[str]
+    error: Optional[str]
 
 
 class TimelapseRequest(BaseModel):
-    """"Timelapse request model."""
+    """ "Timelapse request model."""
 
-    month: str
+    # TODO: parse this into a python `datetime` object (maybe using a validator? )
+    # TODO: validate that exactly one of `date` or `date_range` is supplied
+    date: Optional[str]
+    date_range: Optional[List[str]]
     geojson: PolygonFeature
-    type: str
+    dataset_id: str
+    spotlight_id: Optional[str]
+
+    @validator("date_range")
+    def validate_date_objects(cls, v):
+
+        """Validator"""
+        if not len(v) == 2:
+            raise ValueError("Field `dateRange` must contain exactly 2 dates")
+        return v
+
+    class Config:
+        """Generate alias to convert `camelCase` requests to `snake_case` fields to be used
+        within the code"""
+
+        alias_generator = to_camel
diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py
@@ -27,10 +27,6 @@
     ).Bucket(BUCKET_NAME)
 
 
-DT_FORMAT = "%Y-%m-%d"
-MT_FORMAT = "%Y%m"
-
-
 def handler(event, context):
     """
     Params:

diff --git a/setup.py b/setup.py
@@ -18,6 +18,7 @@
     "geojson-pydantic",
     "requests",
     "mercantile",
+    "python-dateutil",
 ]
 extra_reqs = {
     "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"],