From b4526042a85ff980df02d591af9793a1bc8e4400 Mon Sep 17 00:00:00 2001
From: "Benjamin P. Stewart" <ben.gis.stewart@gmail.com>
Date: Fri, 22 Nov 2024 07:52:35 -0500
Subject: [PATCH] updated data misc with worldcover download

---
 README.md                     |  4 --
 docs/_config.yml              | 28 +++++------
 notebooks/AWS_Summarize.ipynb | 95 ++++++++++++++++++++++++++++++++---
 pyproject.toml                | 32 ++++++------
 src/GOSTrocks/dataMisc.py     | 66 ++++++++++++++++++++++--
 src/GOSTrocks/rasterMisc.py   | 17 ++++---
 6 files changed, 190 insertions(+), 52 deletions(-)

diff --git a/README.md b/README.md
index 4ca6a20..2d77f3e 100644
--- a/README.md
+++ b/README.md
@@ -14,10 +14,6 @@ Future releases can be built from source, but pip will contain the most recent s
 
 Please refer to the World Bank's Github [Contributing](docs/CONTRIBUTING.md) guidelines.
 
-## Code of Conduct
-
-The <span style="color:#3EACAD">template</span> maintains a [Code of Conduct](docs/CODE_OF_CONDUCT.md) to ensure an inclusive and respectful environment for everyone. Please adhere to it in all interactions within our community.
-
 ## License
 
 This project is licensed under the [**Mozilla Public License**](https://www.mozilla.org/en-US/MPL).
diff --git a/docs/_config.yml b/docs/_config.yml
index 352813f..f9c3e6c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -1,8 +1,8 @@
 # Book settings
-title:
+title: GOSTrocks!
 author: Geospatial Operations Support Team (GOST)
 logo: docs/images/logo.png
-only_build_toc_files: false
+only_build_toc_files: true
 
 repository:
   url: https://github.com/worldbank/GOSTrocks
@@ -11,30 +11,23 @@ repository:
 #######################################################################################
 # HTML-specific settings
 html:
-  home_page_in_navbar: false
+  home_page_in_navbar: true
   extra_navbar: ""
   use_edit_page_button: true
   use_repository_button: true
   use_issues_button: true
-  baseurl: https://worldbank.github.io/GOSTrocks
+  baseurl: https://github.com/worldbank/DECAT_Space2Stats
   extra_footer: |
     <div>
-     Country borders or names do not necessarily reflect the World Bank Group’s official position. All maps are for illustrative purposes and do not imply the expression of any opinion on the part of the World Bank, concerning the legal status of any country or territory or concerning the delimitation of frontiers or boundaries
+        <b>All content (unless otherwise specified) is subject to the <a href="https://raw.githubusercontent.com/worldbank/template/main/LICENSE">World Bank Master Community License Agreement.</a></b>
     </div>
-    <div>
-        <b>All content (unless otherwise specified) is subject to the <a href="https://www.mozilla.org/en-US/MPL">Mozilla Public License.</a></b>
-    </div>
-  favicon: docs/images/favicon.ico
-
 #######################################################################################
 # Execution settings
 execute:
   execute_notebooks: off
-
-#######################################################################################
-# Bibliography settings
-bibtex_bibfiles:
-  - docs/bibliography.bib
+  allow_errors: true
+  exclude_patterns:
+    - notebooks/*.ipynb
 
 #######################################################################################
 # Sphinx settings
@@ -42,3 +35,8 @@ sphinx:
   config:
     html_show_copyright: false
     html_last_updated_fmt: "%b %d, %Y"
+    apidoc_module_dir: ../space2stats_api/src
+  extra_extensions:
+  - 'sphinx.ext.autodoc'
+  - sphinx.ext.napoleon
+  - sphinxcontrib.apidoc
diff --git a/notebooks/AWS_Summarize.ipynb b/notebooks/AWS_Summarize.ipynb
index 15324e6..a86e24c 100644
--- a/notebooks/AWS_Summarize.ipynb
+++ b/notebooks/AWS_Summarize.ipynb
@@ -11,18 +11,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "import boto3\n",
+    "import urllib3\n",
     "\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "\n",
+    "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {
     "scrolled": true
    },
@@ -96,7 +99,83 @@
       "Completed loop: 62\n",
       "Completed loop: 63\n",
       "Completed loop: 64\n",
-      "Completed loop: 65\n"
+      "Completed loop: 65\n",
+      "Completed loop: 66\n",
+      "Completed loop: 67\n",
+      "Completed loop: 68\n",
+      "Completed loop: 69\n",
+      "Completed loop: 70\n",
+      "Completed loop: 71\n",
+      "Completed loop: 72\n",
+      "Completed loop: 73\n",
+      "Completed loop: 74\n",
+      "Completed loop: 75\n",
+      "Completed loop: 76\n",
+      "Completed loop: 77\n",
+      "Completed loop: 78\n",
+      "Completed loop: 79\n",
+      "Completed loop: 80\n",
+      "Completed loop: 81\n",
+      "Completed loop: 82\n",
+      "Completed loop: 83\n",
+      "Completed loop: 84\n",
+      "Completed loop: 85\n",
+      "Completed loop: 86\n",
+      "Completed loop: 87\n",
+      "Completed loop: 88\n",
+      "Completed loop: 89\n",
+      "Completed loop: 90\n",
+      "Completed loop: 91\n",
+      "Completed loop: 92\n",
+      "Completed loop: 93\n",
+      "Completed loop: 94\n",
+      "Completed loop: 95\n",
+      "Completed loop: 96\n",
+      "Completed loop: 97\n",
+      "Completed loop: 98\n",
+      "Completed loop: 99\n",
+      "Completed loop: 100\n",
+      "Completed loop: 101\n",
+      "Completed loop: 102\n",
+      "Completed loop: 103\n",
+      "Completed loop: 104\n",
+      "Completed loop: 105\n",
+      "Completed loop: 106\n",
+      "Completed loop: 107\n",
+      "Completed loop: 108\n",
+      "Completed loop: 109\n",
+      "Completed loop: 110\n",
+      "Completed loop: 111\n",
+      "Completed loop: 112\n",
+      "Completed loop: 113\n",
+      "Completed loop: 114\n",
+      "Completed loop: 115\n",
+      "Completed loop: 116\n",
+      "Completed loop: 117\n",
+      "Completed loop: 118\n",
+      "Completed loop: 119\n",
+      "Completed loop: 120\n",
+      "Completed loop: 121\n",
+      "Completed loop: 122\n",
+      "Completed loop: 123\n",
+      "Completed loop: 124\n",
+      "Completed loop: 125\n",
+      "Completed loop: 126\n",
+      "Completed loop: 127\n",
+      "Completed loop: 128\n",
+      "Completed loop: 129\n",
+      "Completed loop: 130\n",
+      "Completed loop: 131\n",
+      "Completed loop: 132\n",
+      "Completed loop: 133\n",
+      "Completed loop: 134\n",
+      "Completed loop: 135\n",
+      "Completed loop: 136\n",
+      "Completed loop: 137\n",
+      "Completed loop: 138\n",
+      "Completed loop: 139\n",
+      "Completed loop: 140\n",
+      "Completed loop: 141\n"
      ]
     }
    ],
@@ -104,7 +183,7 @@
     "bucket = \"wbg-geography01\"\n",
     "prefix = \"sylvera\"\n",
     "region = \"us-east-1\"\n",
-    "s3client = boto3.client(\"s3\", region_name=region)\n",
+    "s3client = boto3.client(\"s3\", region_name=region, verify=False)\n",
     "\n",
     "# Loop through the S3 bucket and get all the file keys\n",
     "more_results = True\n",
@@ -323,9 +402,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Earth Engine",
+   "display_name": "gostrocks",
    "language": "python",
-   "name": "ee"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -337,7 +416,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.4"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index 8b0ddb7..aa2c2c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,34 +31,36 @@ dynamic = ["version"]
 requires-python = ">=3.7"
 
 dependencies = [
-	"rasterio",
-	"geopandas",
-	"pandas",
-	"numexpr > 2.6.8",
-	"numpy",
-	"pyproj",
-	"seaborn",
+	"awscli",
+	"affine",
 	"boto3",
 	"botocore",
 	"contextily",
-	"matplotlib",
-	"tqdm",
-	"xarray",
-	"osmnx",
-	"affine",
-	"PyOpenSSL >= 23.2",
 	"click",
-	"Sphinx",
 	"coverage",
-	"awscli",
 	"flake8",
+	"geopandas",
+	"matplotlib",
+	"numexpr > 2.6.8",
+	"numpy",
+	"osmnx",
+	"gdal",
+	"pandas",
+	"pyproj",
+	"PyOpenSSL >= 23.2",
 	"python-dotenv>=0.5.1",
+	"rasterio",
+	"s3fs",
+	"seaborn",	
+	"tqdm",
+	"xarray"
 ]
 
 [project.optional-dependencies]
 docs = [
 	"docutils==0.17.1",    # https://jupyterbook.org/en/stable/content/citations.html?highlight=docutils#citations-and-bibliographies
 	"jupyter-book >=1,<2",
+	"sphinx"
 ]
 
 [project.urls]
diff --git a/src/GOSTrocks/dataMisc.py b/src/GOSTrocks/dataMisc.py
index 17c8cd4..353e672 100644
--- a/src/GOSTrocks/dataMisc.py
+++ b/src/GOSTrocks/dataMisc.py
@@ -2,6 +2,7 @@
 import json
 import urllib
 import boto3
+import boto3.session
 import rasterio
 
 import pandas as pd
@@ -9,8 +10,9 @@
 
 from botocore.config import Config
 from botocore import UNSIGNED
+from osgeo import gdal
 
-from . import rasterMisc as rMisc
+import rasterMisc as rMisc
 
 
 def download_WSF(
@@ -58,9 +60,9 @@ def aws_search_ntl(
     :type verbose: bool, optional
     """
     if unsigned:
-        s3client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
+        s3client = boto3.client("s3", verify=False, config=Config(signature_version=UNSIGNED))
     else:
-        s3client = boto3.client("s3")
+        s3client = boto3.client("s3", verify=False)
 
     # Loop through the S3 bucket and get all the keys for files that are .tif
     more_results = True
@@ -148,3 +150,61 @@ def get_fathom_vrts(return_df=False):
         vrt_pd["PATH"] = all_vrts
         return vrt_pd
     return all_vrts
+
+def get_worldcover(df, download_folder, worldcover_vrt='WorldCover.vrt',
+                   version='v200',
+                   print_command=False, verbose=False):
+    """ Download ESA globcover from AWS (https://aws.amazon.com/marketplace/pp/prodview-7oorylcamixxc)
+
+    Parameters
+    ----------
+    df : geopandas.GeoDataFrame
+        Data frame used to select tiles to download; selects tiles based on the data frame unary_union
+    download_folder : string 
+        path to folder to download tiles
+    worldcover_vrt : str, optional
+        name of the VRT file to create, by default 'WorldCover.vrt'
+    version : str, optional
+        version of Worldcover to download, by default 'v200', other option is 'v100
+    print_command : bool, optional
+        if true, print the awscli commands to download the tiles. If false, uses boto3
+        to download the tiles, by default False
+    verbose : bool, optional
+        Print more updates during processing, by default False
+    """
+    
+    bucket='esa-worldcover'
+    esa_file_geojson = 'esa_worldcover_grid.geojson'
+    s3 = boto3.client('s3', verify=False, config=Config(signature_version=UNSIGNED))
+    tiles_geojson = os.path.join(download_folder, esa_file_geojson)
+
+    if not os.path.exists(tiles_geojson):
+        s3.download_file(bucket, esa_file_geojson, tiles_geojson)
+
+    tile_path = "{version}/2021/map/ESA_WorldCover_10m_2021_v200_{tile}_Map.tif"
+    
+    in_tiles = gpd.read_file(tiles_geojson)
+    sel_tiles = in_tiles.loc[in_tiles.intersects(df.unary_union)]
+
+    all_tiles = []
+    for idx, row in sel_tiles.iterrows():
+        cur_tile_path = tile_path.format(tile=row['ll_tile'], version=version)
+        cur_out = os.path.join(download_folder, f"WorldCover_{row['ll_tile']}.tif")
+        all_tiles.append(cur_out)
+        if not os.path.exists(cur_out):
+            if print_command:
+                command = f"aws s3 --no-sign-request --no-verify-ssl cp s3://{bucket}/{cur_tile_path} {cur_out}"
+                print(command)
+            else:
+                if not os.path.exists(cur_out):
+                    if verbose:
+                        print(f"Downloading {cur_tile_path} to {cur_out}")
+                    s3.download_file(bucket,cur_tile_path, cur_out)
+                else:
+                    if verbose:
+                        print(f"File {cur_out} already exists")
+    out_vrt = os.path.join(download_folder, worldcover_vrt)
+    gdal.BuildVRT(out_vrt, all_tiles, options=gdal.BuildVRTOptions())
+    
+    return(all_tiles)
+
diff --git a/src/GOSTrocks/rasterMisc.py b/src/GOSTrocks/rasterMisc.py
index 62e5056..f24c91f 100644
--- a/src/GOSTrocks/rasterMisc.py
+++ b/src/GOSTrocks/rasterMisc.py
@@ -18,6 +18,7 @@
 from rasterio.warp import reproject, Resampling, calculate_default_transform
 from rasterio.merge import merge
 from rasterio.io import MemoryFile
+from rasterio.crs import CRS
 from contextlib import contextmanager
 
 curPath = os.path.realpath(
@@ -121,10 +122,14 @@ def project_raster(srcRst, dstCrs, output_raster=""):
     """project raster to destination crs
 
     Args:
-        srcRst (_type_): _description_
-        dstCrs (_type_): _description_
-        output_raster (_type_): _description_
-    """
+        srcRst (rasterio.datasetReader): input rasterio to reproject
+        dstCrs (int): crs to project to
+        output_raster (string): file to write to, defaults to '', which writes nothing
+
+    """    
+    if dstCrs.__class__ == int:
+        dstCrs = CRS.from_epsg(dstCrs)
+
     transform, width, height = calculate_default_transform(
         srcRst.crs, dstCrs, srcRst.width, srcRst.height, *srcRst.bounds
     )
@@ -132,7 +137,6 @@ def project_raster(srcRst, dstCrs, output_raster=""):
     kwargs.update(
         {"crs": dstCrs, "transform": transform, "width": width, "height": height}
     )
-
     # open destination raster
     dstRst = np.zeros([kwargs["count"], width, height], kwargs["dtype"])
 
@@ -147,9 +151,8 @@ def project_raster(srcRst, dstCrs, output_raster=""):
             dst_crs=dstCrs,
             resampling=Resampling.nearest,
         )
-
     if output_raster != "":
-        with rasterio.open(output_raster, "w", *kwargs) as out_raster:
+        with rasterio.open(output_raster, "w", **kwargs) as out_raster:
             out_raster.write(dstRst)
 
     return [dstRst, kwargs]