From a9ba359f15f5cf73f51104fc780d2f396f88148c Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Tue, 26 Jan 2021 16:17:09 -0500 Subject: [PATCH 01/16] implemented timelapse for arbitrary dataset + no datavalue filtering when calculating mean/median" --- covid_api/api/api_v1/endpoints/exceptions.py | 27 ++++++ covid_api/api/api_v1/endpoints/timelapse.py | 96 +++++++++++++++++++- covid_api/api/utils.py | 13 ++- covid_api/db/static/datasets/no2-diff.json | 2 +- covid_api/models/timelapse.py | 18 +++- 5 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 covid_api/api/api_v1/endpoints/exceptions.py diff --git a/covid_api/api/api_v1/endpoints/exceptions.py b/covid_api/api/api_v1/endpoints/exceptions.py new file mode 100644 index 0000000..d5a1c5f --- /dev/null +++ b/covid_api/api/api_v1/endpoints/exceptions.py @@ -0,0 +1,27 @@ +"""Exceptions for the endpoints classes""" + + +class NonRasterDataset(Exception): + """Thrown if timelapse requested for a non-raster dataset""" + + pass + + +class UnableToExtractS3Url(Exception): + """Thrown if code is not ale to extract the S3 URL of the dataset """ + + pass + + +class InvalidDateFormat(Exception): + """Thrown if the timelapse request query contains a date that is not correctly + formatted for the given dataset """ + + pass + + +class MissingSpotlightId(Exception): + """Thrown if the timelapse request query is for a spotlight specific dataset, + but no spotlightId was supplied in the query """ + + pass diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index d050b0c..37fc314 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,10 +1,25 @@ """API metadata.""" +import re +from datetime import datetime +from covid_api.api.api_v1.endpoints.exceptions import ( + InvalidDateFormat, + MissingSpotlightId, + NonRasterDataset, + UnableToExtractS3Url, +) from covid_api.api.utils import get_zonal_stat +from covid_api.core.config import API_VERSION_STR, DT_FORMAT, MT_FORMAT +from covid_api.db.static.datasets import datasets as _datasets +from covid_api.db.static.errors import InvalidIdentifier +from covid_api.db.static.sites import sites +from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue from fastapi import APIRouter +from starlette.requests import Request + router = APIRouter() @@ -13,11 +28,82 @@ responses={200: {"description": "Return timelapse values for a given geometry"}}, response_model=TimelapseValue, ) -def timelapse(query: TimelapseRequest): +def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" - if query.type == "no2": - url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" - else: - url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + + # format S3 URL template with spotlightId, if dataset is + # spotlight specific + if "{spotlightId}" in url: + url = _insert_spotlight_id(url, query.spotlight_id) + mean, median = get_zonal_stat(query.geojson, url) return dict(mean=mean, median=median) + + +def _get_dataset_metadata(request: Request, query: TimelapseRequest): + + scheme = request.url.scheme + host = request.headers["host"] + + if API_VERSION_STR: + host += API_VERSION_STR + + dataset = list( + filter( + lambda d: d.id == query.dataset_id, + _datasets.get_all(api_url=f"{scheme}://{host}").datasets, + ) + ) + + if not dataset: + raise InvalidIdentifier + + dataset = dataset[0] + + if dataset.source.type != "raster": + raise NonRasterDataset + + return dataset + + +def _extract_s3_url(dataset: Dataset): + url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) + if not url_search: + raise UnableToExtractS3Url + + return url_search.group(1) + + +def _insert_date(url: str, dataset: Dataset, date: str): + _validate_query_date(dataset, date) + + url = url.replace("{date}", date) + + +def _validate_query_date(dataset: Dataset, date: str): + date_format = DT_FORMAT if dataset.time_unit == "day" else MT_FORMAT + try: + datetime.strptime(date, date_format) + except ValueError: + raise InvalidDateFormat + + +def _insert_spotlight_id(url: str, spotlight_id: str): + if not spotlight_id: + raise MissingSpotlightId + try: + sites.get(spotlight_id) + except InvalidIdentifier: + raise + + url = url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 177164d..8e7986c 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -213,17 +213,26 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: """Return zonal statistics.""" geom = shape(geojson.geometry.dict()) with rasterio.open(raster) as src: + # read the raster data matching the geometry bounds window = bounds_window(geom.bounds, src.transform) # store our window information & read window_affine = src.window_transform(window) data = src.read(window=window) - # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) + print("Data: ", data) + print("PctCover: ", pctcover) + print("Nodata val: ", src.nodata) + + # Create a mask of the data that filters out the tile's `nodata` value. In order + # to ensure the average calculation isn't incorrectly affected by large, negative, + # `nodata` values. + masked_data = np.ma.masked_not_equal(data[0], src.nodata) + return ( - np.average(data[0], weights=pctcover), + np.average(masked_data, weights=pctcover), np.nanmedian(data), ) diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json index 62eb16d..32882bb 100644 --- a/covid_api/db/static/datasets/no2-diff.json +++ b/covid_api/db/static/datasets/no2-diff.json @@ -3,7 +3,7 @@ "name": "NO\u2082 (Diff)", "type": "raster-timeseries", "time_unit": "month", - "is_periodic": false, + "is_periodic": true, "s3_location": "OMNO2d_HRMDifference", "source": { "type": "raster", diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 8275a1d..32093fb 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,10 +1,17 @@ """Tilelapse models.""" +import re +from typing import Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon from pydantic import BaseModel +def to_camel(s): + """ Convert string s from `snake_case` to `camelCase` """ + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) + + class PolygonFeature(Feature): """Feature model.""" @@ -21,6 +28,13 @@ class TimelapseValue(BaseModel): class TimelapseRequest(BaseModel): """"Timelapse request model.""" - month: str + date: str geojson: PolygonFeature - type: str + dataset_id: str + spotlight_id: Optional[str] + + class Config: + """Generate alias to convert `camelCase` requests to `snake_case` fields to be used + within the code """ + + alias_generator = to_camel From f3e5dc6f7b1822fda1f998a1303ddeb9bc8071cd Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Tue, 26 Jan 2021 16:44:10 -0500 Subject: [PATCH 02/16] minor fixes to masking function and helper functions in timelapse --- covid_api/api/api_v1/endpoints/timelapse.py | 5 ++--- covid_api/api/utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index 37fc314..bfd8c56 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -86,8 +86,7 @@ def _extract_s3_url(dataset: Dataset): def _insert_date(url: str, dataset: Dataset, date: str): _validate_query_date(dataset, date) - - url = url.replace("{date}", date) + return url.replace("{date}", date) def _validate_query_date(dataset: Dataset, date: str): @@ -106,4 +105,4 @@ def _insert_spotlight_id(url: str, spotlight_id: str): except InvalidIdentifier: raise - url = url.replace("{spotlightId}", spotlight_id) + return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 8e7986c..356cdfe 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -229,7 +229,7 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # Create a mask of the data that filters out the tile's `nodata` value. In order # to ensure the average calculation isn't incorrectly affected by large, negative, # `nodata` values. - masked_data = np.ma.masked_not_equal(data[0], src.nodata) + masked_data = np.ma.masked_equal(data[0], src.nodata) return ( np.average(masked_data, weights=pctcover), From ef9557afd946944214562f63d4155ce7b80d9d10 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 10:34:29 -0500 Subject: [PATCH 03/16] modified reserved concurrency to be 50 in non-prod stacks --- stack/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stack/config.py b/stack/config.py index 81c73f1..31934c9 100644 --- a/stack/config.py +++ b/stack/config.py @@ -41,7 +41,7 @@ ################################################################################ TIMEOUT: int = 10 MEMORY: int = 1536 -MAX_CONCURRENT: int = 500 +MAX_CONCURRENT: int = 500 if STAGE == "prod" else 50 # Cache CACHE_NODE_TYPE = "cache.m5.large" From 131dc38cd5a1974c4c33e05ca36c82553dfbf610 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 16:48:02 -0500 Subject: [PATCH 04/16] WIP --- lambda/dataset_metadata_generator/src/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py index 3cb95ff..b76fdfa 100644 --- a/lambda/dataset_metadata_generator/src/main.py +++ b/lambda/dataset_metadata_generator/src/main.py @@ -19,9 +19,6 @@ s3 = boto3.resource("s3") bucket = s3.Bucket(BUCKET_NAME) -DT_FORMAT = "%Y-%m-%d" -MT_FORMAT = "%Y%m" - def handler(event, context): """ From 0e826d3197db196ba3bff303c3374f5b0c5725ed Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Wed, 27 Jan 2021 21:00:25 -0500 Subject: [PATCH 05/16] fixed invalide `day` format in timelapse endpoint --- covid_api/api/api_v1/endpoints/timelapse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index bfd8c56..f9ce789 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -9,7 +9,7 @@ UnableToExtractS3Url, ) from covid_api.api.utils import get_zonal_stat -from covid_api.core.config import API_VERSION_STR, DT_FORMAT, MT_FORMAT +from covid_api.core.config import API_VERSION_STR from covid_api.db.static.datasets import datasets as _datasets from covid_api.db.static.errors import InvalidIdentifier from covid_api.db.static.sites import sites @@ -90,7 +90,7 @@ def _insert_date(url: str, dataset: Dataset, date: str): def _validate_query_date(dataset: Dataset, date: str): - date_format = DT_FORMAT if dataset.time_unit == "day" else MT_FORMAT + date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" try: datetime.strptime(date, date_format) except ValueError: From 2630cc76aac3b009e147a129db86aec6d11169ef Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 5 Feb 2021 15:30:30 -0500 Subject: [PATCH 06/16] added error handling --- covid_api/api/api_v1/endpoints/detections.py | 2 ++ covid_api/api/api_v1/endpoints/exceptions.py | 27 -------------- covid_api/api/api_v1/endpoints/timelapse.py | 38 ++++++++++++-------- covid_api/api/utils.py | 5 +-- 4 files changed, 27 insertions(+), 45 deletions(-) delete mode 100644 covid_api/api/api_v1/endpoints/exceptions.py diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py index 669b29e..c6409cf 100644 --- a/covid_api/api/api_v1/endpoints/detections.py +++ b/covid_api/api/api_v1/endpoints/detections.py @@ -32,5 +32,7 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str): key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", ) ) + # TODO: catch the specific exception that corresponds to a missing file + # and raise 404, otherwise raise a generic 500 error. except Exception: raise HTTPException(status_code=404, detail="Detections not found") diff --git a/covid_api/api/api_v1/endpoints/exceptions.py b/covid_api/api/api_v1/endpoints/exceptions.py deleted file mode 100644 index d5a1c5f..0000000 --- a/covid_api/api/api_v1/endpoints/exceptions.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Exceptions for the endpoints classes""" - - -class NonRasterDataset(Exception): - """Thrown if timelapse requested for a non-raster dataset""" - - pass - - -class UnableToExtractS3Url(Exception): - """Thrown if code is not ale to extract the S3 URL of the dataset """ - - pass - - -class InvalidDateFormat(Exception): - """Thrown if the timelapse request query contains a date that is not correctly - formatted for the given dataset """ - - pass - - -class MissingSpotlightId(Exception): - """Thrown if the timelapse request query is for a spotlight specific dataset, - but no spotlightId was supplied in the query """ - - pass diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index f9ce789..fd72883 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -2,12 +2,6 @@ import re from datetime import datetime -from covid_api.api.api_v1.endpoints.exceptions import ( - InvalidDateFormat, - MissingSpotlightId, - NonRasterDataset, - UnableToExtractS3Url, -) from covid_api.api.utils import get_zonal_stat from covid_api.core.config import API_VERSION_STR from covid_api.db.static.datasets import datasets as _datasets @@ -16,7 +10,7 @@ from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from starlette.requests import Request @@ -45,8 +39,14 @@ def timelapse(request: Request, query: TimelapseRequest): # spotlight specific if "{spotlightId}" in url: url = _insert_spotlight_id(url, query.spotlight_id) + try: + mean, median = get_zonal_stat(query.geojson, url) + except ValueError: + raise HTTPException( + status_code=400, + detail="Unable to calculate mean/median values. This is likely due to a bounding box extending beyond the borders of the tile.", + ) - mean, median = get_zonal_stat(query.geojson, url) return dict(mean=mean, median=median) @@ -66,12 +66,17 @@ def _get_dataset_metadata(request: Request, query: TimelapseRequest): ) if not dataset: - raise InvalidIdentifier + raise HTTPException( + status_code=404, detail=f"No dataset found for id: {query.dataset_id}" + ) dataset = dataset[0] if dataset.source.type != "raster": - raise NonRasterDataset + raise HTTPException( + status_code=400, + detail=f"Dataset {query.dataset_id} is not a raster-type dataset", + ) return dataset @@ -79,7 +84,7 @@ def _get_dataset_metadata(request: Request, query: TimelapseRequest): def _extract_s3_url(dataset: Dataset): url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) if not url_search: - raise UnableToExtractS3Url + raise HTTPException(status_code=500) return url_search.group(1) @@ -94,15 +99,20 @@ def _validate_query_date(dataset: Dataset, date: str): try: datetime.strptime(date, date_format) except ValueError: - raise InvalidDateFormat + raise HTTPException( + status_code=400, + detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", + ) def _insert_spotlight_id(url: str, spotlight_id: str): if not spotlight_id: - raise MissingSpotlightId + raise HTTPException(status_code=400, detail="Missing spotlightId") try: sites.get(spotlight_id) except InvalidIdentifier: - raise + raise HTTPException( + status_code=404, detail=f"No spotlight found for id: {spotlight_id}" + ) return url.replace("{spotlightId}", spotlight_id) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 356cdfe..645fa2e 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -219,13 +219,10 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # store our window information & read window_affine = src.window_transform(window) data = src.read(window=window) + # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) - print("Data: ", data) - print("PctCover: ", pctcover) - print("Nodata val: ", src.nodata) - # Create a mask of the data that filters out the tile's `nodata` value. In order # to ensure the average calculation isn't incorrectly affected by large, negative, # `nodata` values. From 8ef2d850b2a47547cb5307062525354435e982b5 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Thu, 20 May 2021 21:52:12 -0700 Subject: [PATCH 07/16] updated fb-population-density legend stops to match ylorrd color scale (same as gibs population) --- .../static/datasets/fb-population-density.json | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/covid_api/db/static/datasets/fb-population-density.json b/covid_api/db/static/datasets/fb-population-density.json index 6151023..f279868 100644 --- a/covid_api/db/static/datasets/fb-population-density.json +++ b/covid_api/db/static/datasets/fb-population-density.json @@ -35,16 +35,15 @@ }, "legend": { "type": "gradient", - "min": "less", - "max": "more", + "min": "0 people/30m²", + "max": "69 people/30m²", "stops": [ - "#99c5e0", - "#f9eaa9", - "#f7765d", - "#c13b72", - "#461070", - "#050308" + "#FFEFCB", + "#FBA54A", + "#FB9F46", + "#F35228", + "#BD0026" ] }, - "info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas" + "info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas, with a 30m² resolution" } \ No newline at end of file From 6f7617133059c5bf0aec12e4787db7e9acb8224f Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 21 May 2021 13:55:11 -0700 Subject: [PATCH 08/16] WIP - working on date range timelapse endpoint --- covid_api/api/api_v1/endpoints/timelapse.py | 99 +++++++++++++++++---- covid_api/api/utils.py | 1 - covid_api/models/timelapse.py | 26 ++++-- setup.py | 1 + 4 files changed, 100 insertions(+), 27 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index fd72883..e20e3e0 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,6 +1,10 @@ """API metadata.""" import re -from datetime import datetime +from concurrent import futures +from datetime import datetime, timedelta +from typing import List, Union + +from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat from covid_api.core.config import API_VERSION_STR @@ -17,30 +21,22 @@ router = APIRouter() -@router.post( - "/timelapse", - responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=TimelapseValue, -) -def timelapse(request: Request, query: TimelapseRequest): - """Handle /timelapse requests.""" - - # get dataset metadata for the requested dataset - # will be used to validate other parts of the query - dataset = _get_dataset_metadata(request, query) - - # extract S3 URL template from dataset metadata info - url = _extract_s3_url(dataset) - - # format S3 URL template with date object - url = _insert_date(url, dataset, query.date) +# TODO: validate inputs with typing/pydantic models +def _get_mean_median(query, url, dataset): # format S3 URL template with spotlightId, if dataset is # spotlight specific if "{spotlightId}" in url: + if not query.spotlight_id: + raise HTTPException( + status_code=400, + detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", + ) url = _insert_spotlight_id(url, query.spotlight_id) try: + print("REQUESTING ZONAL STATS for URL", url) mean, median = get_zonal_stat(query.geojson, url) + print("DONE! ", mean, median) except ValueError: raise HTTPException( status_code=400, @@ -50,6 +46,71 @@ def timelapse(request: Request, query: TimelapseRequest): return dict(mean=mean, median=median) +@router.post( + "/timelapse", + responses={200: {"description": "Return timelapse values for a given geometry"}}, + response_model=Union[TimelapseValue, List[TimelapseValue]], +) +def timelapse(request: Request, query: TimelapseRequest): + """Handle /timelapse requests.""" + + # get dataset metadata for the requested dataset + # will be used to validate other parts of the query + dataset = _get_dataset_metadata(request, query) + + # extract S3 URL template from dataset metadata info + url = _extract_s3_url(dataset) + + if query.date: + print("SINGE DATE IN QUERY - calculating") + + # format S3 URL template with date object + url = _insert_date(url, dataset, query.date) + print("URL: ", url) + return _get_mean_median(query, url, dataset) + + if query.date_range: + + start = _validate_query_date(dataset, query.date_range[0]) + + end = _validate_query_date(dataset, query.date_range[1]) + + if dataset.time_unit == "day": + # Add 1 to days to ensure it contains the end date as well + dates = [ + datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") + for x in range(0, (end - start).days + 1) + ] + + if dataset.time_unit == "month": + num_months = (end.year - start.year) * 12 + (end.month - start.month) + dates = [ + datetime.strftime((start + relativedelta(months=+x)), "%Y%m") + for x in range(0, num_months + 1) + ] + print("DATES TO QUERY: ", dates) + + stats = [] + with futures.ThreadPoolExecutor(max_workers=15) as executor: + future_stats_queries = { + executor.submit( + _get_mean_median, query, _insert_date(url, dataset, date), dataset + ): date + for date in dates + } + print("FUTURE stats queries: ", future_stats_queries) + for future in futures.as_completed(future_stats_queries): + date = future_stats_queries[future] + print("FROM FUTURE: ", date) + try: + print("RESULT: ", future.result()) + stats.append({"date": date, **future.result()}) + except HTTPException as e: + stats.append({"date": date, "error": e.detail}) + print("STATS TO BE RETURNED: ", stats) + return stats + + def _get_dataset_metadata(request: Request, query: TimelapseRequest): scheme = request.url.scheme @@ -97,7 +158,7 @@ def _insert_date(url: str, dataset: Dataset, date: str): def _validate_query_date(dataset: Dataset, date: str): date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" try: - datetime.strptime(date, date_format) + return datetime.strptime(date, date_format) except ValueError: raise HTTPException( status_code=400, diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 1c48d6e..8c2d406 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -214,7 +214,6 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: """Return zonal statistics.""" geom = shape(geojson.geometry.dict()) with rasterio.open(raster) as src: - # read the raster data matching the geometry bounds window = bounds_window(geom.bounds, src.transform) # store our window information & read diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 32093fb..22f5518 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,14 +1,14 @@ """Tilelapse models.""" import re -from typing import Optional +from typing import List, Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel +from pydantic import BaseModel, validator def to_camel(s): - """ Convert string s from `snake_case` to `camelCase` """ + """Convert string s from `snake_case` to `camelCase`""" return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) @@ -19,22 +19,34 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): - """"Timelapse values model.""" + """ "Timelapse values model.""" mean: float median: float + date: Optional[str] + error: Optional[str] class TimelapseRequest(BaseModel): - """"Timelapse request model.""" + """ "Timelapse request model.""" - date: str + # TODO: parse this into a python `datetime` object (maybe using a validator? ) + # TODO: validate that exactly one of `date` or `date_range` is supplied + date: Optional[str] + date_range: Optional[List[str]] geojson: PolygonFeature dataset_id: str spotlight_id: Optional[str] + @validator("date_range") + def validate_date_objects(cls, v): + """Validator""" + if not len(v) == 2: + raise ValueError("Field `dateRange` must contain exactly 2 dates") + return v + class Config: """Generate alias to convert `camelCase` requests to `snake_case` fields to be used - within the code """ + within the code""" alias_generator = to_camel diff --git a/setup.py b/setup.py index 1d32a67..4c8d413 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ "geojson-pydantic", "requests", "mercantile", + "python-dateutil", ] extra_reqs = { "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"], From 700198424ed599bfb58b4b84ebaa44ffbb93ab40 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 21 May 2021 14:05:21 -0700 Subject: [PATCH 09/16] works with No2 - very slow with nightlights --- covid_api/api/api_v1/endpoints/timelapse.py | 5 +---- covid_api/models/timelapse.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index e20e3e0..b9fd4b6 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -62,17 +62,14 @@ def timelapse(request: Request, query: TimelapseRequest): url = _extract_s3_url(dataset) if query.date: - print("SINGE DATE IN QUERY - calculating") # format S3 URL template with date object url = _insert_date(url, dataset, query.date) - print("URL: ", url) return _get_mean_median(query, url, dataset) if query.date_range: start = _validate_query_date(dataset, query.date_range[0]) - end = _validate_query_date(dataset, query.date_range[1]) if dataset.time_unit == "day": @@ -91,7 +88,7 @@ def timelapse(request: Request, query: TimelapseRequest): print("DATES TO QUERY: ", dates) stats = [] - with futures.ThreadPoolExecutor(max_workers=15) as executor: + with futures.ThreadPoolExecutor(max_workers=10) as executor: future_stats_queries = { executor.submit( _get_mean_median, query, _insert_date(url, dataset, date), dataset diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 22f5518..9237cda 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -40,6 +40,7 @@ class TimelapseRequest(BaseModel): @validator("date_range") def validate_date_objects(cls, v): + """Validator""" if not len(v) == 2: raise ValueError("Field `dateRange` must contain exactly 2 dates") From bccd41f70f43b03c63916f04d512514429585c19 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Mon, 24 May 2021 17:40:14 -0700 Subject: [PATCH 10/16] WIP - added threaded timelapse for date-range enabled request --- covid_api/api/api_v1/endpoints/timelapse.py | 44 ++++++++++++--------- covid_api/models/timelapse.py | 4 +- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index b9fd4b6..b2457a4 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -34,22 +34,25 @@ def _get_mean_median(query, url, dataset): ) url = _insert_spotlight_id(url, query.spotlight_id) try: - print("REQUESTING ZONAL STATS for URL", url) mean, median = get_zonal_stat(query.geojson, url) - print("DONE! ", mean, median) - except ValueError: + return dict(mean=mean, median=median) + + except Exception: raise HTTPException( status_code=400, - detail="Unable to calculate mean/median values. This is likely due to a bounding box extending beyond the borders of the tile.", + detail=( + "Unable to calculate mean/median values. This either due to a bounding box " + "extending beyond the edges of the COG or there are no COGs available for the " + "requested date range." + ), ) - return dict(mean=mean, median=median) - @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=Union[TimelapseValue, List[TimelapseValue]], + response_model=Union[List[TimelapseValue], TimelapseValue], + response_model_exclude_none=True, ) def timelapse(request: Request, query: TimelapseRequest): """Handle /timelapse requests.""" @@ -67,45 +70,48 @@ def timelapse(request: Request, query: TimelapseRequest): url = _insert_date(url, dataset, query.date) return _get_mean_median(query, url, dataset) + # Gather a list of dates to query if query.date_range: - start = _validate_query_date(dataset, query.date_range[0]) - end = _validate_query_date(dataset, query.date_range[1]) - if dataset.time_unit == "day": - # Add 1 to days to ensure it contains the end date as well + # Get start and end dates + start = datetime.strptime(query.date_range[0], "%Y_%m_%d") + end = datetime.strptime(query.date_range[1], "%Y_%m_%d") + + # Populated all days in between Add 1 to days to ensure it contains the end date as well dates = [ datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") for x in range(0, (end - start).days + 1) ] if dataset.time_unit == "month": + # Get start and end dates, as a + start = datetime.strptime(query.date_range[0], "%Y%m") + end = datetime.strptime(query.date_range[1], "%Y%m") num_months = (end.year - start.year) * 12 + (end.month - start.month) dates = [ datetime.strftime((start + relativedelta(months=+x)), "%Y%m") for x in range(0, num_months + 1) ] - print("DATES TO QUERY: ", dates) - stats = [] - with futures.ThreadPoolExecutor(max_workers=10) as executor: + with futures.ThreadPoolExecutor(max_workers=15) as executor: future_stats_queries = { executor.submit( _get_mean_median, query, _insert_date(url, dataset, date), dataset ): date for date in dates } - print("FUTURE stats queries: ", future_stats_queries) + + stats = [] + for future in futures.as_completed(future_stats_queries): date = future_stats_queries[future] - print("FROM FUTURE: ", date) try: - print("RESULT: ", future.result()) stats.append({"date": date, **future.result()}) except HTTPException as e: + stats.append({"date": date, "error": e.detail}) - print("STATS TO BE RETURNED: ", stats) - return stats + return stats def _get_dataset_metadata(request: Request, query: TimelapseRequest): diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 9237cda..1e7dfa6 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -21,8 +21,8 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): """ "Timelapse values model.""" - mean: float - median: float + mean: Optional[float] + median: Optional[float] date: Optional[str] error: Optional[str] From c229aed88cf6136a6cc93e47c8eb248f1ac70568 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Thu, 10 Jun 2021 10:24:10 -0400 Subject: [PATCH 11/16] cleanup comments - added AOI area validation --- .gitignore | 2 ++ covid_api/api/api_v1/endpoints/timelapse.py | 33 +++++++++++---------- covid_api/core/config.py | 2 ++ covid_api/main.py | 2 +- covid_api/models/timelapse.py | 19 +++++++++++- setup.py | 1 + stack/config.py | 6 ++-- 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 9a55f92..1f4490c 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,9 @@ wheels/ *.egg-info/ .installed.cfg *.egg +Pipfile +cdk.context.json # PyInstaller # Usually these files are written by a python script from a template diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index b2457a4..96debde 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -70,31 +70,31 @@ def timelapse(request: Request, query: TimelapseRequest): url = _insert_date(url, dataset, query.date) return _get_mean_median(query, url, dataset) - # Gather a list of dates to query if query.date_range: if dataset.time_unit == "day": # Get start and end dates - start = datetime.strptime(query.date_range[0], "%Y_%m_%d") - end = datetime.strptime(query.date_range[1], "%Y_%m_%d") + start = _validate_query_date(dataset, query.date_range[0]) + end = _validate_query_date(dataset, query.date_range[1]) - # Populated all days in between Add 1 to days to ensure it contains the end date as well + # Populate all days in between Add 1 to days to ensure it contains the end date as well dates = [ datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") for x in range(0, (end - start).days + 1) ] if dataset.time_unit == "month": - # Get start and end dates, as a start = datetime.strptime(query.date_range[0], "%Y%m") end = datetime.strptime(query.date_range[1], "%Y%m") + num_months = (end.year - start.year) * 12 + (end.month - start.month) + dates = [ datetime.strftime((start + relativedelta(months=+x)), "%Y%m") for x in range(0, num_months + 1) ] - with futures.ThreadPoolExecutor(max_workers=15) as executor: + with futures.ThreadPoolExecutor(max_workers=10) as executor: future_stats_queries = { executor.submit( _get_mean_median, query, _insert_date(url, dataset, date), dataset @@ -102,16 +102,16 @@ def timelapse(request: Request, query: TimelapseRequest): for date in dates } - stats = [] + stats = [] - for future in futures.as_completed(future_stats_queries): - date = future_stats_queries[future] - try: - stats.append({"date": date, **future.result()}) - except HTTPException as e: + for future in futures.as_completed(future_stats_queries): + date = future_stats_queries[future] + try: + stats.append({"date": date, **future.result()}) + except HTTPException as e: + stats.append({"date": date, "error": e.detail}) - stats.append({"date": date, "error": e.detail}) - return stats + return sorted(stats, key=lambda s: s["date"]) def _get_dataset_metadata(request: Request, query: TimelapseRequest): @@ -165,7 +165,10 @@ def _validate_query_date(dataset: Dataset, date: str): except ValueError: raise HTTPException( status_code=400, - detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", + detail=( + f"Invalid date format. {date} should be like " + f"{'YYYYMM' if dataset.time_unit == 'month' else 'YYYY_MM_DD'}" + ), ) diff --git a/covid_api/core/config.py b/covid_api/core/config.py index def35db..10cc2f1 100644 --- a/covid_api/core/config.py +++ b/covid_api/core/config.py @@ -31,3 +31,5 @@ DT_FORMAT = "%Y-%m-%d" MT_FORMAT = "%Y%m" PLANET_API_KEY = os.environ.get("PLANET_API_KEY") + +TIMELAPSE_MAX_AREA = 200000 # km^2 diff --git a/covid_api/main.py b/covid_api/main.py index 02ac04b..c31a136 100644 --- a/covid_api/main.py +++ b/covid_api/main.py @@ -44,7 +44,7 @@ CORSMiddleware, allow_origins=origins, allow_credentials=True, - allow_methods=["GET"], + allow_methods=["*"], allow_headers=["*"], ) diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 1e7dfa6..4c67a5b 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -2,10 +2,13 @@ import re from typing import List, Optional +from area import area from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon from pydantic import BaseModel, validator +from covid_api.core import config + def to_camel(s): """Convert string s from `snake_case` to `camelCase`""" @@ -30,7 +33,7 @@ class TimelapseValue(BaseModel): class TimelapseRequest(BaseModel): """ "Timelapse request model.""" - # TODO: parse this into a python `datetime` object (maybe using a validator? ) + # TODO: parse date/date_range into a python `datetime` object (maybe using a validator? ) # TODO: validate that exactly one of `date` or `date_range` is supplied date: Optional[str] date_range: Optional[List[str]] @@ -38,6 +41,20 @@ class TimelapseRequest(BaseModel): dataset_id: str spotlight_id: Optional[str] + @validator("geojson") + def validate_query_area(cls, v, values): + """Ensure that requested AOI is is not larger than 200 000 km^2, otherwise + query takes too long""" + if area(v.geometry.dict()) / ( + 1000 * 1000 + ) > config.TIMELAPSE_MAX_AREA and values.get("date_range"): + + raise ValueError( + "AOI cannot exceed 200 000 km^2, when queried with a date range. " + "To query with this AOI please query with a single date" + ) + return v + @validator("date_range") def validate_date_objects(cls, v): diff --git a/setup.py b/setup.py index 4c8d413..8942441 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ "rio-tiler==2.0a.11", "fiona", "shapely", + "area", "rasterio==1.1.8", "rasterstats", "geojson-pydantic", diff --git a/stack/config.py b/stack/config.py index 0e310d0..89a98d9 100644 --- a/stack/config.py +++ b/stack/config.py @@ -39,8 +39,10 @@ # LAMBDA # # # ################################################################################ -TIMEOUT: int = 10 -MEMORY: int = 1536 +# TIMEOUT: int = 10 +TIMEOUT: int = 30 +MEMORY: int = 3008 +# MEMORY: int = 10240 # stack skips setting concurrency if this value is 0 # the stack will instead use unreserved lambda concurrency From 0458aaaf4b4e7cfd64967ec22fd02049bbb17412 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Thu, 10 Jun 2021 10:25:33 -0400 Subject: [PATCH 12/16] Revert "Feature: abstract timelapse endpoint" --- covid_api/api/api_v1/endpoints/detections.py | 2 - covid_api/api/api_v1/endpoints/timelapse.py | 177 +----------------- covid_api/api/utils.py | 7 +- covid_api/db/static/datasets/no2-diff.json | 2 +- covid_api/models/timelapse.py | 41 +--- lambda/dataset_metadata_generator/src/main.py | 4 + setup.py | 1 - 7 files changed, 22 insertions(+), 212 deletions(-) diff --git a/covid_api/api/api_v1/endpoints/detections.py b/covid_api/api/api_v1/endpoints/detections.py index 8170db1..f649916 100644 --- a/covid_api/api/api_v1/endpoints/detections.py +++ b/covid_api/api/api_v1/endpoints/detections.py @@ -33,7 +33,5 @@ def get_detection(ml_type: MLTypes, site: SiteNames, date: str): key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", ) ) - # TODO: catch the specific exception that corresponds to a missing file - # and raise 404, otherwise raise a generic 500 error. except Exception: raise HTTPException(status_code=404, detail="Detections not found") diff --git a/covid_api/api/api_v1/endpoints/timelapse.py b/covid_api/api/api_v1/endpoints/timelapse.py index b2457a4..d050b0c 100644 --- a/covid_api/api/api_v1/endpoints/timelapse.py +++ b/covid_api/api/api_v1/endpoints/timelapse.py @@ -1,182 +1,23 @@ """API metadata.""" -import re -from concurrent import futures -from datetime import datetime, timedelta -from typing import List, Union - -from dateutil.relativedelta import relativedelta from covid_api.api.utils import get_zonal_stat -from covid_api.core.config import API_VERSION_STR -from covid_api.db.static.datasets import datasets as _datasets -from covid_api.db.static.errors import InvalidIdentifier -from covid_api.db.static.sites import sites -from covid_api.models.static import Dataset from covid_api.models.timelapse import TimelapseRequest, TimelapseValue -from fastapi import APIRouter, HTTPException - -from starlette.requests import Request +from fastapi import APIRouter router = APIRouter() -# TODO: validate inputs with typing/pydantic models -def _get_mean_median(query, url, dataset): - - # format S3 URL template with spotlightId, if dataset is - # spotlight specific - if "{spotlightId}" in url: - if not query.spotlight_id: - raise HTTPException( - status_code=400, - detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", - ) - url = _insert_spotlight_id(url, query.spotlight_id) - try: - mean, median = get_zonal_stat(query.geojson, url) - return dict(mean=mean, median=median) - - except Exception: - raise HTTPException( - status_code=400, - detail=( - "Unable to calculate mean/median values. This either due to a bounding box " - "extending beyond the edges of the COG or there are no COGs available for the " - "requested date range." - ), - ) - - @router.post( "/timelapse", responses={200: {"description": "Return timelapse values for a given geometry"}}, - response_model=Union[List[TimelapseValue], TimelapseValue], - response_model_exclude_none=True, + response_model=TimelapseValue, ) -def timelapse(request: Request, query: TimelapseRequest): +def timelapse(query: TimelapseRequest): """Handle /timelapse requests.""" - - # get dataset metadata for the requested dataset - # will be used to validate other parts of the query - dataset = _get_dataset_metadata(request, query) - - # extract S3 URL template from dataset metadata info - url = _extract_s3_url(dataset) - - if query.date: - - # format S3 URL template with date object - url = _insert_date(url, dataset, query.date) - return _get_mean_median(query, url, dataset) - - # Gather a list of dates to query - if query.date_range: - - if dataset.time_unit == "day": - # Get start and end dates - start = datetime.strptime(query.date_range[0], "%Y_%m_%d") - end = datetime.strptime(query.date_range[1], "%Y_%m_%d") - - # Populated all days in between Add 1 to days to ensure it contains the end date as well - dates = [ - datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") - for x in range(0, (end - start).days + 1) - ] - - if dataset.time_unit == "month": - # Get start and end dates, as a - start = datetime.strptime(query.date_range[0], "%Y%m") - end = datetime.strptime(query.date_range[1], "%Y%m") - num_months = (end.year - start.year) * 12 + (end.month - start.month) - dates = [ - datetime.strftime((start + relativedelta(months=+x)), "%Y%m") - for x in range(0, num_months + 1) - ] - - with futures.ThreadPoolExecutor(max_workers=15) as executor: - future_stats_queries = { - executor.submit( - _get_mean_median, query, _insert_date(url, dataset, date), dataset - ): date - for date in dates - } - - stats = [] - - for future in futures.as_completed(future_stats_queries): - date = future_stats_queries[future] - try: - stats.append({"date": date, **future.result()}) - except HTTPException as e: - - stats.append({"date": date, "error": e.detail}) - return stats - - -def _get_dataset_metadata(request: Request, query: TimelapseRequest): - - scheme = request.url.scheme - host = request.headers["host"] - - if API_VERSION_STR: - host += API_VERSION_STR - - dataset = list( - filter( - lambda d: d.id == query.dataset_id, - _datasets.get_all(api_url=f"{scheme}://{host}").datasets, - ) - ) - - if not dataset: - raise HTTPException( - status_code=404, detail=f"No dataset found for id: {query.dataset_id}" - ) - - dataset = dataset[0] - - if dataset.source.type != "raster": - raise HTTPException( - status_code=400, - detail=f"Dataset {query.dataset_id} is not a raster-type dataset", - ) - - return dataset - - -def _extract_s3_url(dataset: Dataset): - url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) - if not url_search: - raise HTTPException(status_code=500) - - return url_search.group(1) - - -def _insert_date(url: str, dataset: Dataset, date: str): - _validate_query_date(dataset, date) - return url.replace("{date}", date) - - -def _validate_query_date(dataset: Dataset, date: str): - date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" - try: - return datetime.strptime(date, date_format) - except ValueError: - raise HTTPException( - status_code=400, - detail=f"Invalid date format. {date} should be either YYYY_MM_DD or YYYYMM", - ) - - -def _insert_spotlight_id(url: str, spotlight_id: str): - if not spotlight_id: - raise HTTPException(status_code=400, detail="Missing spotlightId") - try: - sites.get(spotlight_id) - except InvalidIdentifier: - raise HTTPException( - status_code=404, detail=f"No spotlight found for id: {spotlight_id}" - ) - - return url.replace("{spotlightId}", spotlight_id) + if query.type == "no2": + url = f"s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{query.month}_Col3_V4.nc.tif" + else: + url = f"s3://covid-eo-data/xco2-mean/xco2_16day_mean.{query.month}.tif" + mean, median = get_zonal_stat(query.geojson, url) + return dict(mean=mean, median=median) diff --git a/covid_api/api/utils.py b/covid_api/api/utils.py index 8c2d406..1267ea0 100644 --- a/covid_api/api/utils.py +++ b/covid_api/api/utils.py @@ -223,13 +223,8 @@ def get_zonal_stat(geojson: Feature, raster: str) -> Tuple[float, float]: # calculate the coverage of pixels for weighting pctcover = rasterize_pctcover(geom, atrans=window_affine, shape=data.shape[1:]) - # Create a mask of the data that filters out the tile's `nodata` value. In order - # to ensure the average calculation isn't incorrectly affected by large, negative, - # `nodata` values. - masked_data = np.ma.masked_equal(data[0], src.nodata) - return ( - np.average(masked_data, weights=pctcover), + np.average(data[0], weights=pctcover), np.nanmedian(data), ) diff --git a/covid_api/db/static/datasets/no2-diff.json b/covid_api/db/static/datasets/no2-diff.json index d69f09e..987f70c 100644 --- a/covid_api/db/static/datasets/no2-diff.json +++ b/covid_api/db/static/datasets/no2-diff.json @@ -3,7 +3,7 @@ "name": "NO\u2082 (Diff)", "type": "raster-timeseries", "time_unit": "month", - "is_periodic": true, + "is_periodic": false, "s3_location": "OMNO2d_HRMDifference", "source": { "type": "raster", diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 1e7dfa6..8275a1d 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -1,15 +1,8 @@ """Tilelapse models.""" -import re -from typing import List, Optional from geojson_pydantic.features import Feature from geojson_pydantic.geometries import Polygon -from pydantic import BaseModel, validator - - -def to_camel(s): - """Convert string s from `snake_case` to `camelCase`""" - return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) +from pydantic import BaseModel class PolygonFeature(Feature): @@ -19,35 +12,15 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): - """ "Timelapse values model.""" + """"Timelapse values model.""" - mean: Optional[float] - median: Optional[float] - date: Optional[str] - error: Optional[str] + mean: float + median: float class TimelapseRequest(BaseModel): - """ "Timelapse request model.""" + """"Timelapse request model.""" - # TODO: parse this into a python `datetime` object (maybe using a validator? ) - # TODO: validate that exactly one of `date` or `date_range` is supplied - date: Optional[str] - date_range: Optional[List[str]] + month: str geojson: PolygonFeature - dataset_id: str - spotlight_id: Optional[str] - - @validator("date_range") - def validate_date_objects(cls, v): - - """Validator""" - if not len(v) == 2: - raise ValueError("Field `dateRange` must contain exactly 2 dates") - return v - - class Config: - """Generate alias to convert `camelCase` requests to `snake_case` fields to be used - within the code""" - - alias_generator = to_camel + type: str diff --git a/lambda/dataset_metadata_generator/src/main.py b/lambda/dataset_metadata_generator/src/main.py index fb6a010..49abb49 100644 --- a/lambda/dataset_metadata_generator/src/main.py +++ b/lambda/dataset_metadata_generator/src/main.py @@ -27,6 +27,10 @@ ).Bucket(BUCKET_NAME) +DT_FORMAT = "%Y-%m-%d" +MT_FORMAT = "%Y%m" + + def handler(event, context): """ Params: diff --git a/setup.py b/setup.py index 4c8d413..1d32a67 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,6 @@ "geojson-pydantic", "requests", "mercantile", - "python-dateutil", ] extra_reqs = { "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"], From 6374daa2f0387e810c2ada114380df0a3bcf96f4 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Thu, 10 Jun 2021 10:57:00 -0400 Subject: [PATCH 13/16] added missing date to /timelapse endpoint output --- covid_api/models/timelapse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/covid_api/models/timelapse.py b/covid_api/models/timelapse.py index 80e6909..341009c 100644 --- a/covid_api/models/timelapse.py +++ b/covid_api/models/timelapse.py @@ -25,6 +25,7 @@ class PolygonFeature(Feature): class TimelapseValue(BaseModel): """"Timelapse values model.""" + date: Optional[str] mean: float median: float From faf6377606ea93586c8db11992a4329667e85a66 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Thu, 10 Jun 2021 18:13:06 -0400 Subject: [PATCH 14/16] removed VPC_ID from staging stack deployment to allow staging stack to use it's own VPC. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f46ff4..2988e5d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,7 +40,7 @@ jobs: command: | if [ "${CIRCLE_BRANCH}" == "develop" ]; then STAGE='staging' cdk deploy covid-api-dataset-metadata-generator-staging --region us-east-1 --require-approval never - STAGE='staging' VPC_ID='vpc-0fa3007e738c7bbdf' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never + STAGE='staging' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never fi deploy-production: From 3a9f34a29609981742044c0a4cf88b30cf4ae235 Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 11 Jun 2021 09:55:34 -0400 Subject: [PATCH 15/16] Bumped to version 0.6.2 - Includes fix fb population legend and dateRange enabled /timelapse endpoint --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 91ff6cf..a83ca9f 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ setup( name="covid_api", - version="0.6.1", + version="0.7.1", description=u"", long_description=long_description, long_description_content_type="text/markdown", From cac6ecde686b41b04067a044ce57a0a4646e059b Mon Sep 17 00:00:00 2001 From: Leo Thomas Date: Fri, 11 Jun 2021 09:55:42 -0400 Subject: [PATCH 16/16] Bumped to version 0.6.2 - Includes fix fb population legend and dateRange enabled /timelapse endpoint --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a83ca9f..0f14a55 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ setup( name="covid_api", - version="0.7.1", + version="0.6.2", description=u"", long_description=long_description, long_description_content_type="text/markdown",