diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f8aa2a..c87806e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,7 @@ on: branches: - main - dev + - feat/fsspec-filecache tags: - 'v*' paths: @@ -48,13 +49,20 @@ jobs: python -m pip install pre-commit pre-commit run --all-files + - name: run tests without cache + if: ${{ matrix.python-version == env.LATEST_PY_VERSION }} + env: + TITILER_XARRAY_ENABLE_FSSPEC_CACHE: FALSE + run: python -m pytest --cov titiler.xarray --cov-report term-missing -s -vv + - name: Run tests run: python -m pytest --cov titiler.xarray --cov-report term-missing -s -vv deploy: - needs: [tests] + #needs: [tests] runs-on: ubuntu-latest - if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v') + #if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v') + if: github.ref == 'refs/heads/feat/fsspec-filecache' defaults: run: @@ -88,6 +96,16 @@ jobs: python -m pip install --upgrade pip python -m pip install -r requirements-cdk.txt + # Build and deploy to the feature environment whenever there is a push to main or dev + - name: Build & Deploy Feature2 + if: github.ref == 'refs/heads/feat/fsspec-filecache' + run: npm run cdk -- deploy titiler-xarray-feature2 --require-approval never + env: + TITILER_XARRAY_PYTHONWARNINGS: ignore + TITILER_XARRAY_DEBUG: True + STACK_ALARM_EMAIL: ${{ secrets.ALARM_EMAIL }} + STACK_STAGE: feature2 + # Build and deploy to the development environment whenever there is a push to main or dev - name: Build & Deploy Development if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' diff --git a/.gitignore b/.gitignore index ff51c59..7070c57 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,6 @@ cdk.out/ node_modules cdk.context.json *.nc + +*cache* +*logs.txt diff --git a/README.md b/README.md index 121751a..917b7d9 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ virtualenv .venv python -m pip install -e . uvicorn source .venv/bin/activate -uvicorn titiler.xarray.main:app --reload +FSSPEC_CACHE_DIRECTORY="fsspec_cache" uvicorn titiler.xarray.main:app --reload ``` To access the docs, visit http://127.0.0.1:8000/docs. diff --git a/infrastructure/aws/cdk/app.py b/infrastructure/aws/cdk/app.py index 350812f..eb54035 100644 --- a/infrastructure/aws/cdk/app.py +++ b/infrastructure/aws/cdk/app.py @@ -3,10 +3,13 @@ import os from typing import Any, Dict, List, Optional +import aws_cdk from aws_cdk import App, CfnOutput, Duration, Stack, Tags from aws_cdk import aws_apigatewayv2_alpha as apigw from aws_cdk import aws_cloudwatch as cloudwatch from aws_cdk import aws_cloudwatch_actions as cloudwatch_actions +from aws_cdk import aws_ec2 as ec2 +from aws_cdk import aws_efs as efs from aws_cdk import aws_iam as iam from aws_cdk import aws_lambda from aws_cdk import aws_logs as logs @@ -54,6 +57,44 @@ def __init__( permissions = permissions or [] environment = environment or {} + vpc = ec2.Vpc( + self, + "titiler-xarray-vpc", + max_azs=2, # Default is all AZs in the region + nat_gateways=1, + cidr="10.0.0.0/16", + # Define custom CIDR range for each subnet type + subnet_configuration=[ + ec2.SubnetConfiguration( + name="Public", subnet_type=ec2.SubnetType.PUBLIC, cidr_mask=24 + ), + ec2.SubnetConfiguration( + name="Private", + subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT, + cidr_mask=24, + ), + ], + ) + + # Create and attach a file system + file_system = efs.FileSystem( + self, + "EfsFileSystem", + vpc=vpc, + lifecycle_policy=efs.LifecyclePolicy.AFTER_7_DAYS, # Or choose another policy + performance_mode=efs.PerformanceMode.GENERAL_PURPOSE, + ) + + access_point = file_system.add_access_point( + "AccessPoint", + path="/export/lambda", + create_acl={"owner_uid": "1001", "owner_gid": "1001", "permissions": "750"}, + posix_user={ + "uid": "1001", + "gid": "1001", + }, + ) + lambda_function = aws_lambda.Function( self, f"{id}-lambda", @@ -69,6 +110,18 @@ def __init__( timeout=Duration.seconds(timeout), environment={**DEFAULT_ENV, **environment}, log_retention=logs.RetentionDays.ONE_WEEK, + ephemeral_storage_size=aws_cdk.Size.gibibytes(10), + vpc=vpc, + filesystem=aws_lambda.FileSystem.from_efs_access_point( + access_point, "/mnt/efs" + ), # Mounting it to /mnt/efs in Lambda + ) + + file_system.connections.allow_default_port_from(lambda_function) + file_system.grant( + lambda_function, + "elasticfilesystem:ClientMount", + "elasticfilesystem:ClientWrite", ) for perm in permissions: diff --git a/infrastructure/aws/lambda/Dockerfile b/infrastructure/aws/lambda/Dockerfile index aef3e92..4ca6db9 100644 --- a/infrastructure/aws/lambda/Dockerfile +++ b/infrastructure/aws/lambda/Dockerfile @@ -16,7 +16,7 @@ COPY titiler/ titiler/ # we have to force using old package version that seems `almost` compatible with Lambda env botocore # https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html RUN pip install --upgrade pip -RUN pip install . "rio-tiler>=5.0.0" "cftime" "mangum>=0.10.0" "pandas==1.5.3" "botocore==1.29.76" "aiobotocore==2.5.0" "s3fs==2023.4.0" "fsspec==2023.4.0" "zarr==2.14.2" "xarray==0.19.0" -t /asset --no-binary pydantic +RUN pip install . "mangum>=0.10.0" "botocore==1.29.76" "aiobotocore==2.5.0" -t /asset --no-binary pydantic # Reduce package size and remove useless files RUN cd /asset && find . -type f -name '*.pyc' | while read f; do n=$(echo $f | sed 's/__pycache__\///' | sed 's/.cpython-[0-9]*//'); cp $f $n; done; @@ -24,7 +24,10 @@ RUN cd /asset && find . -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf RUN cd /asset && find . -type f -a -name '*.py' -print0 | xargs -0 rm -f RUN find /asset -type d -a -name 'tests' -print0 | xargs -0 rm -rf RUN rm -rdf /asset/numpy/doc/ /asset/bin /asset/geos_license /asset/Misc -RUN rm -rdf /asset/boto3* /asset/botocore* +RUN rm -rdf /asset/boto3* +RUN rm -rdf /asset/botocore* +RUN rm -rdf /asset/dask/tests* +RUN rm -rdf /asset/dask/dataframe* COPY infrastructure/aws/lambda/handler.py /asset/handler.py diff --git a/pyproject.toml b/pyproject.toml index 590bf0f..eaab1f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,18 +25,19 @@ classifiers = [ ] dynamic = ["version"] dependencies = [ + "cftime", "h5netcdf", "xarray", "rioxarray", "zarr", "fsspec", "s3fs", - "requests", "aiohttp", + "requests", "pydantic==2.0.2", "titiler.core>=0.14.1,<0.15", - "starlette-cramjam>=0.3,<0.4", - "pydantic-settings~=2.0" + "pydantic-settings~=2.0", + "pandas==1.5.3", ] [project.optional-dependencies] diff --git a/tests/conftest.py b/tests/conftest.py index c75e63b..5f644de 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,8 @@ """titiler.xarray tests configuration.""" +import os +import shutil + import pytest from fastapi.testclient import TestClient @@ -13,3 +16,16 @@ def app(monkeypatch): with TestClient(app) as client: yield client + + +def pytest_sessionstart(session): + """Setup before tests run.""" + test_cache_dir = "fsspec_test_cache" + os.environ["TITILER_XARRAY_FSSPEC_CACHE_DIRECTORY"] = test_cache_dir + os.makedirs(test_cache_dir, exist_ok=True) + + +def pytest_sessionfinish(session, exitstatus): + """Cleanup step after all tests have been run.""" + shutil.rmtree(os.environ["TITILER_XARRAY_FSSPEC_CACHE_DIRECTORY"]) + print("\nAll tests are done! Cleaning up...") diff --git a/tests/fixtures/3B42_Daily.19980101.7.nc4 b/tests/fixtures/3B42_Daily.19980101.7.nc4 new file mode 100644 index 0000000..6b3e07c Binary files /dev/null and b/tests/fixtures/3B42_Daily.19980101.7.nc4 differ diff --git a/tests/fixtures/generate_test_reference.py b/tests/fixtures/generate_test_reference.py index 10c320f..8254961 100644 --- a/tests/fixtures/generate_test_reference.py +++ b/tests/fixtures/generate_test_reference.py @@ -2,12 +2,13 @@ from datetime import datetime -import fsspec import netCDF4 as nc import numpy as np from kerchunk.combine import MultiZarrToZarr from kerchunk.hdf import SingleHdf5ToZarr +import fsspec + def create_netcdf(filename, date): with nc.Dataset(filename, "w", format="NETCDF4") as ds: diff --git a/tests/test_app.py b/tests/test_app.py index 2a044eb..9ba9abd 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -7,6 +7,7 @@ test_zarr_store = os.path.join(DATA_DIR, "test_zarr_store.zarr") test_reference_store = os.path.join(DATA_DIR, "reference.json") test_netcdf_store = os.path.join(DATA_DIR, "testfile.nc") +test_transposed_netcdf_store = os.path.join(DATA_DIR, "3B42_Daily.19980101.7.nc4") test_unconsolidated_store = os.path.join(DATA_DIR, "unconsolidated.zarr") test_pyramid_store = os.path.join(DATA_DIR, "pyramid.zarr") @@ -28,6 +29,16 @@ "params": {"url": test_netcdf_store, "variable": "data", "decode_times": False}, "variables": ["data"], } + +test_transposed_netcdf_store_params = { + "params": { + "url": test_transposed_netcdf_store, + "variable": "precipitation", + "decode_times": False, + }, + "variables": ["precipitation"], +} + test_unconsolidated_store_params = { "params": { "url": test_unconsolidated_store, @@ -149,7 +160,7 @@ def test_get_tilejson_pyramid(app): def get_tile_test(app, ds_params, zoom: int = 0): response = app.get( - f"/tiles/{zoom}/0/0.png", + "/tiles/0/0/0.png", params=ds_params["params"], ) assert response.status_code == 200 @@ -173,6 +184,17 @@ def test_get_tile_netcdf(app): return get_tile_test(app, test_netcdf_store_params) +def test_get_tile_transposed_netcdf_error(app): + response = app.get( + "/tiles/0/0/0.png", + params=test_transposed_netcdf_store_params["params"], + ) + assert response.status_code == 422 + assert response.json() == { + "detail": "Invalid dimension order. Expected order: ('y', 'x'). You can use `DataArray.transpose('y', 'x')` to reorder your dimensions. Data variable: precipitation" + } + + def test_get_tile_unconsolidated(app): return get_tile_test(app, test_unconsolidated_store_params) diff --git a/tests/test_remote.py b/tests/test_remote.py new file mode 100644 index 0000000..d228518 --- /dev/null +++ b/tests/test_remote.py @@ -0,0 +1,14 @@ +from test_app import get_variables_test + +test_remote_netcdf_store = "https://nex-gddp-cmip6.s3-us-west-2.amazonaws.com/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/pr/pr_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950.nc" +test_remote_netcdf_store_params = { + "params": { + "url": test_remote_netcdf_store, + "variable": "pr", + "decode_times": False, + }, + "variables": ["pr"], +} + +def test_get_variables_remote_netcdf(app): + return get_variables_test(app, test_remote_netcdf_store_params) diff --git a/titiler/xarray/factory.py b/titiler/xarray/factory.py index 95861f6..92f75dd 100644 --- a/titiler/xarray/factory.py +++ b/titiler/xarray/factory.py @@ -229,6 +229,13 @@ def tiles_endpoint( # type: ignore description="Whether to expect and open zarr store with consolidated metadata", ), ] = True, + anon: Annotated[ + Optional[bool], + Query( + title="anon", + description="Use credentials to access data when false.", + ), + ] = True, ) -> Response: """Create map tile from a dataset.""" tms = self.supported_tms.get(tileMatrixSetId) @@ -242,8 +249,8 @@ def tiles_endpoint( # type: ignore time_slice=time_slice, tms=tms, consolidated=consolidated, + anon=anon, ) as src_dst: - image = src_dst.tile( x, y, z, tilesize=scale * 256, nodata=src_dst.input.rio.nodata ) @@ -387,6 +394,7 @@ def tilejson_endpoint( # type: ignore decode_times=decode_times, tms=tms, consolidated=consolidated, + drop_dim=drop_dim, ) as src_dst: # see https://github.com/corteva/rioxarray/issues/645 minx, miny, maxx, maxy = zip( @@ -433,6 +441,10 @@ def histogram( description="Select a specific zarr group from a zarr hierarchy, can be for pyramids or datasets. Can be used to open a dataset in HDF5 files." ), ] = None, + drop_dim: Annotated[ + Optional[str], + Query(description="Dimension to drop"), + ] = None, ): with self.reader( url, @@ -440,6 +452,7 @@ def histogram( reference=reference, consolidated=consolidated, group=group, + drop_dim=drop_dim, ) as src_dst: boolean_mask = ~np.isnan(src_dst.input) data_values = src_dst.input.values[boolean_mask] diff --git a/titiler/xarray/main.py b/titiler/xarray/main.py index 34f679e..0df0405 100644 --- a/titiler/xarray/main.py +++ b/titiler/xarray/main.py @@ -1,15 +1,16 @@ """titiler app.""" import logging +import os +import shutil import rioxarray -import xarray import zarr from fastapi import FastAPI from starlette import status from starlette.middleware.cors import CORSMiddleware -from starlette_cramjam.middleware import CompressionMiddleware +import titiler.xarray.reader as reader from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers from titiler.core.factory import AlgorithmFactory, TMSFactory from titiler.core.middleware import ( @@ -53,6 +54,7 @@ error_codes = { zarr.errors.GroupNotFoundError: status.HTTP_422_UNPROCESSABLE_ENTITY, + rioxarray.exceptions.InvalidDimensionOrder: status.HTTP_422_UNPROCESSABLE_ENTITY, } add_exception_handlers(app, error_codes) add_exception_handlers(app, DEFAULT_STATUS_CODES) @@ -67,18 +69,6 @@ allow_headers=["*"], ) -app.add_middleware( - CompressionMiddleware, - minimum_size=0, - exclude_mediatype={ - "image/jpeg", - "image/jpg", - "image/png", - "image/jp2", - "image/webp", - }, -) - app.add_middleware( CacheControlMiddleware, cachecontrol=api_settings.cachecontrol, @@ -91,7 +81,7 @@ app.add_middleware( ServerTimingMiddleware, calls_to_track={ - "1-xarray-open_dataset": (xarray.open_dataset,), + "1-xarray-open_dataset": (reader.xarray_open_dataset,), "2-rioxarray-reproject": (rioxarray.raster_array.RasterArray.reproject,), }, ) @@ -107,3 +97,31 @@ def ping(): """Health check.""" return {"ping": "pong!"} + + +@app.get( + "/clear_cache", + description="Clear Cache", + summary="Clear Cache.", + operation_id="clear cache", + tags=["Clear Cache"], +) +def clear_cache(): + """ + Clear the cache. + """ + print("Clearing the cache...") + cache_dir = os.path.expanduser(api_settings.fsspec_cache_directory) + if os.path.exists(cache_dir): + # Iterate over each directory and file in the root of the EFS + for root_dir, dirs, files in os.walk(cache_dir, topdown=False): + for name in files: + file_path = os.path.join(root_dir, name) + os.remove(file_path) + print(f"Deleted file: {file_path}") + + for name in dirs: + dir_path = os.path.join(root_dir, name) + shutil.rmtree(dir_path) + print(f"Deleted directory: {dir_path}") + return {"message": "cache cleared"} diff --git a/titiler/xarray/map-form.html b/titiler/xarray/map-form.html index 8bd800f..6b73f7a 100644 --- a/titiler/xarray/map-form.html +++ b/titiler/xarray/map-form.html @@ -75,6 +75,10 @@ background-color: #0056b3; } + #other-fields-list li { + margin-bottom: 0.5em; + } + /* histogram container */ #histogram-container { margin-top: 20px; @@ -123,17 +127,23 @@