From b4526042a85ff980df02d591af9793a1bc8e4400 Mon Sep 17 00:00:00 2001 From: "Benjamin P. Stewart" <ben.gis.stewart@gmail.com> Date: Fri, 22 Nov 2024 07:52:35 -0500 Subject: [PATCH] updated data misc with worldcover download --- README.md | 4 -- docs/_config.yml | 28 +++++------ notebooks/AWS_Summarize.ipynb | 95 ++++++++++++++++++++++++++++++++--- pyproject.toml | 32 ++++++------ src/GOSTrocks/dataMisc.py | 66 ++++++++++++++++++++++-- src/GOSTrocks/rasterMisc.py | 17 ++++--- 6 files changed, 190 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 4ca6a20..2d77f3e 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,6 @@ Future releases can be built from source, but pip will contain the most recent s Please refer to the World Bank's Github [Contributing](docs/CONTRIBUTING.md) guidelines. -## Code of Conduct - -The <span style="color:#3EACAD">template</span> maintains a [Code of Conduct](docs/CODE_OF_CONDUCT.md) to ensure an inclusive and respectful environment for everyone. Please adhere to it in all interactions within our community. - ## License This project is licensed under the [**Mozilla Public License**](https://www.mozilla.org/en-US/MPL). diff --git a/docs/_config.yml b/docs/_config.yml index 352813f..f9c3e6c 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1,8 +1,8 @@ # Book settings -title: +title: GOSTrocks! author: Geospatial Operations Support Team (GOST) logo: docs/images/logo.png -only_build_toc_files: false +only_build_toc_files: true repository: url: https://github.com/worldbank/GOSTrocks @@ -11,30 +11,23 @@ repository: ####################################################################################### # HTML-specific settings html: - home_page_in_navbar: false + home_page_in_navbar: true extra_navbar: "" use_edit_page_button: true use_repository_button: true use_issues_button: true - baseurl: https://worldbank.github.io/GOSTrocks + baseurl: https://github.com/worldbank/DECAT_Space2Stats extra_footer: | <div> - Country borders or names do not necessarily reflect the World Bank Group’s official position. All maps are for illustrative purposes and do not imply the expression of any opinion on the part of the World Bank, concerning the legal status of any country or territory or concerning the delimitation of frontiers or boundaries + <b>All content (unless otherwise specified) is subject to the <a href="https://raw.githubusercontent.com/worldbank/template/main/LICENSE">World Bank Master Community License Agreement.</a></b> </div> - <div> - <b>All content (unless otherwise specified) is subject to the <a href="https://www.mozilla.org/en-US/MPL">Mozilla Public License.</a></b> - </div> - favicon: docs/images/favicon.ico - ####################################################################################### # Execution settings execute: execute_notebooks: off - -####################################################################################### -# Bibliography settings -bibtex_bibfiles: - - docs/bibliography.bib + allow_errors: true + exclude_patterns: + - notebooks/*.ipynb ####################################################################################### # Sphinx settings @@ -42,3 +35,8 @@ sphinx: config: html_show_copyright: false html_last_updated_fmt: "%b %d, %Y" + apidoc_module_dir: ../space2stats_api/src + extra_extensions: + - 'sphinx.ext.autodoc' + - sphinx.ext.napoleon + - sphinxcontrib.apidoc diff --git a/notebooks/AWS_Summarize.ipynb b/notebooks/AWS_Summarize.ipynb index 15324e6..a86e24c 100644 --- a/notebooks/AWS_Summarize.ipynb +++ b/notebooks/AWS_Summarize.ipynb @@ -11,18 +11,21 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import boto3\n", + "import urllib3\n", "\n", - "import pandas as pd" + "import pandas as pd\n", + "\n", + "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": { "scrolled": true }, @@ -96,7 +99,83 @@ "Completed loop: 62\n", "Completed loop: 63\n", "Completed loop: 64\n", - "Completed loop: 65\n" + "Completed loop: 65\n", + "Completed loop: 66\n", + "Completed loop: 67\n", + "Completed loop: 68\n", + "Completed loop: 69\n", + "Completed loop: 70\n", + "Completed loop: 71\n", + "Completed loop: 72\n", + "Completed loop: 73\n", + "Completed loop: 74\n", + "Completed loop: 75\n", + "Completed loop: 76\n", + "Completed loop: 77\n", + "Completed loop: 78\n", + "Completed loop: 79\n", + "Completed loop: 80\n", + "Completed loop: 81\n", + "Completed loop: 82\n", + "Completed loop: 83\n", + "Completed loop: 84\n", + "Completed loop: 85\n", + "Completed loop: 86\n", + "Completed loop: 87\n", + "Completed loop: 88\n", + "Completed loop: 89\n", + "Completed loop: 90\n", + "Completed loop: 91\n", + "Completed loop: 92\n", + "Completed loop: 93\n", + "Completed loop: 94\n", + "Completed loop: 95\n", + "Completed loop: 96\n", + "Completed loop: 97\n", + "Completed loop: 98\n", + "Completed loop: 99\n", + "Completed loop: 100\n", + "Completed loop: 101\n", + "Completed loop: 102\n", + "Completed loop: 103\n", + "Completed loop: 104\n", + "Completed loop: 105\n", + "Completed loop: 106\n", + "Completed loop: 107\n", + "Completed loop: 108\n", + "Completed loop: 109\n", + "Completed loop: 110\n", + "Completed loop: 111\n", + "Completed loop: 112\n", + "Completed loop: 113\n", + "Completed loop: 114\n", + "Completed loop: 115\n", + "Completed loop: 116\n", + "Completed loop: 117\n", + "Completed loop: 118\n", + "Completed loop: 119\n", + "Completed loop: 120\n", + "Completed loop: 121\n", + "Completed loop: 122\n", + "Completed loop: 123\n", + "Completed loop: 124\n", + "Completed loop: 125\n", + "Completed loop: 126\n", + "Completed loop: 127\n", + "Completed loop: 128\n", + "Completed loop: 129\n", + "Completed loop: 130\n", + "Completed loop: 131\n", + "Completed loop: 132\n", + "Completed loop: 133\n", + "Completed loop: 134\n", + "Completed loop: 135\n", + "Completed loop: 136\n", + "Completed loop: 137\n", + "Completed loop: 138\n", + "Completed loop: 139\n", + "Completed loop: 140\n", + "Completed loop: 141\n" ] } ], @@ -104,7 +183,7 @@ "bucket = \"wbg-geography01\"\n", "prefix = \"sylvera\"\n", "region = \"us-east-1\"\n", - "s3client = boto3.client(\"s3\", region_name=region)\n", + "s3client = boto3.client(\"s3\", region_name=region, verify=False)\n", "\n", "# Loop through the S3 bucket and get all the file keys\n", "more_results = True\n", @@ -323,9 +402,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Earth Engine", + "display_name": "gostrocks", "language": "python", - "name": "ee" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -337,7 +416,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 8b0ddb7..aa2c2c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,34 +31,36 @@ dynamic = ["version"] requires-python = ">=3.7" dependencies = [ - "rasterio", - "geopandas", - "pandas", - "numexpr > 2.6.8", - "numpy", - "pyproj", - "seaborn", + "awscli", + "affine", "boto3", "botocore", "contextily", - "matplotlib", - "tqdm", - "xarray", - "osmnx", - "affine", - "PyOpenSSL >= 23.2", "click", - "Sphinx", "coverage", - "awscli", "flake8", + "geopandas", + "matplotlib", + "numexpr > 2.6.8", + "numpy", + "osmnx", + "gdal", + "pandas", + "pyproj", + "PyOpenSSL >= 23.2", "python-dotenv>=0.5.1", + "rasterio", + "s3fs", + "seaborn", + "tqdm", + "xarray" ] [project.optional-dependencies] docs = [ "docutils==0.17.1", # https://jupyterbook.org/en/stable/content/citations.html?highlight=docutils#citations-and-bibliographies "jupyter-book >=1,<2", + "sphinx" ] [project.urls] diff --git a/src/GOSTrocks/dataMisc.py b/src/GOSTrocks/dataMisc.py index 17c8cd4..353e672 100644 --- a/src/GOSTrocks/dataMisc.py +++ b/src/GOSTrocks/dataMisc.py @@ -2,6 +2,7 @@ import json import urllib import boto3 +import boto3.session import rasterio import pandas as pd @@ -9,8 +10,9 @@ from botocore.config import Config from botocore import UNSIGNED +from osgeo import gdal -from . import rasterMisc as rMisc +import rasterMisc as rMisc def download_WSF( @@ -58,9 +60,9 @@ def aws_search_ntl( :type verbose: bool, optional """ if unsigned: - s3client = boto3.client("s3", config=Config(signature_version=UNSIGNED)) + s3client = boto3.client("s3", verify=False, config=Config(signature_version=UNSIGNED)) else: - s3client = boto3.client("s3") + s3client = boto3.client("s3", verify=False) # Loop through the S3 bucket and get all the keys for files that are .tif more_results = True @@ -148,3 +150,61 @@ def get_fathom_vrts(return_df=False): vrt_pd["PATH"] = all_vrts return vrt_pd return all_vrts + +def get_worldcover(df, download_folder, worldcover_vrt='WorldCover.vrt', + version='v200', + print_command=False, verbose=False): + """ Download ESA globcover from AWS (https://aws.amazon.com/marketplace/pp/prodview-7oorylcamixxc) + + Parameters + ---------- + df : geopandas.GeoDataFrame + Data frame used to select tiles to download; selects tiles based on the data frame unary_union + download_folder : string + path to folder to download tiles + worldcover_vrt : str, optional + name of the VRT file to create, by default 'WorldCover.vrt' + version : str, optional + version of Worldcover to download, by default 'v200', other option is 'v100 + print_command : bool, optional + if true, print the awscli commands to download the tiles. If false, uses boto3 + to download the tiles, by default False + verbose : bool, optional + Print more updates during processing, by default False + """ + + bucket='esa-worldcover' + esa_file_geojson = 'esa_worldcover_grid.geojson' + s3 = boto3.client('s3', verify=False, config=Config(signature_version=UNSIGNED)) + tiles_geojson = os.path.join(download_folder, esa_file_geojson) + + if not os.path.exists(tiles_geojson): + s3.download_file(bucket, esa_file_geojson, tiles_geojson) + + tile_path = "{version}/2021/map/ESA_WorldCover_10m_2021_v200_{tile}_Map.tif" + + in_tiles = gpd.read_file(tiles_geojson) + sel_tiles = in_tiles.loc[in_tiles.intersects(df.unary_union)] + + all_tiles = [] + for idx, row in sel_tiles.iterrows(): + cur_tile_path = tile_path.format(tile=row['ll_tile'], version=version) + cur_out = os.path.join(download_folder, f"WorldCover_{row['ll_tile']}.tif") + all_tiles.append(cur_out) + if not os.path.exists(cur_out): + if print_command: + command = f"aws s3 --no-sign-request --no-verify-ssl cp s3://{bucket}/{cur_tile_path} {cur_out}" + print(command) + else: + if not os.path.exists(cur_out): + if verbose: + print(f"Downloading {cur_tile_path} to {cur_out}") + s3.download_file(bucket,cur_tile_path, cur_out) + else: + if verbose: + print(f"File {cur_out} already exists") + out_vrt = os.path.join(download_folder, worldcover_vrt) + gdal.BuildVRT(out_vrt, all_tiles, options=gdal.BuildVRTOptions()) + + return(all_tiles) + diff --git a/src/GOSTrocks/rasterMisc.py b/src/GOSTrocks/rasterMisc.py index 62e5056..f24c91f 100644 --- a/src/GOSTrocks/rasterMisc.py +++ b/src/GOSTrocks/rasterMisc.py @@ -18,6 +18,7 @@ from rasterio.warp import reproject, Resampling, calculate_default_transform from rasterio.merge import merge from rasterio.io import MemoryFile +from rasterio.crs import CRS from contextlib import contextmanager curPath = os.path.realpath( @@ -121,10 +122,14 @@ def project_raster(srcRst, dstCrs, output_raster=""): """project raster to destination crs Args: - srcRst (_type_): _description_ - dstCrs (_type_): _description_ - output_raster (_type_): _description_ - """ + srcRst (rasterio.datasetReader): input rasterio to reproject + dstCrs (int): crs to project to + output_raster (string): file to write to, defaults to '', which writes nothing + + """ + if dstCrs.__class__ == int: + dstCrs = CRS.from_epsg(dstCrs) + transform, width, height = calculate_default_transform( srcRst.crs, dstCrs, srcRst.width, srcRst.height, *srcRst.bounds ) @@ -132,7 +137,6 @@ def project_raster(srcRst, dstCrs, output_raster=""): kwargs.update( {"crs": dstCrs, "transform": transform, "width": width, "height": height} ) - # open destination raster dstRst = np.zeros([kwargs["count"], width, height], kwargs["dtype"]) @@ -147,9 +151,8 @@ def project_raster(srcRst, dstCrs, output_raster=""): dst_crs=dstCrs, resampling=Resampling.nearest, ) - if output_raster != "": - with rasterio.open(output_raster, "w", *kwargs) as out_raster: + with rasterio.open(output_raster, "w", **kwargs) as out_raster: out_raster.write(dstRst) return [dstRst, kwargs]