From b525e4bc84eb9a36f4aca29a8fdc9c50346d95ad Mon Sep 17 00:00:00 2001 From: mans4178 Date: Thu, 28 Mar 2024 17:39:05 +0000 Subject: [PATCH 01/20] dem and lulc rules added --- Snakefile | 6 ++++ environment.yml | 1 + metadata/copernicus_dem.yml | 21 +++++++++++++ metadata/copernicus_lulc.yml | 21 +++++++++++++ rules/copernicus_dem.smk | 25 ++++++++++++++++ rules/copernicus_lulc.smk | 43 +++++++++++++++++++++++++++ src/irv_datapkg/__init__.py | 57 ++++++++++++++++++++++++++++++++++++ 7 files changed, 174 insertions(+) mode change 100644 => 100755 Snakefile create mode 100755 metadata/copernicus_dem.yml create mode 100755 metadata/copernicus_lulc.yml create mode 100755 rules/copernicus_dem.smk create mode 100755 rules/copernicus_lulc.smk mode change 100644 => 100755 src/irv_datapkg/__init__.py diff --git a/Snakefile b/Snakefile old mode 100644 new mode 100755 index aab754e..b3311e8 --- a/Snakefile +++ b/Snakefile @@ -19,6 +19,8 @@ BOUNDARY_LU = BOUNDARIES.set_index("CODE_A3") envvars: "ZENODO_TOKEN", + "COPERNICUS_CDS_URL", + "COPERNICUS_CDS_API_KEY" def boundary_geom(iso3): @@ -73,6 +75,8 @@ rule checksums: "data/{ISO3}/openstreetmap/openstreetmap_roads-tertiary__{ISO3}.gpkg", "data/{ISO3}/storm.csv", "data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg", + "data/{ISO3}/copernicus_lulc/copernicus_lulc__{ISO3}.tif", + "data/{ISO3}/copernicus_dem/copernicus_dem__{ISO3}.tif", output: checksums="data/{ISO3}/md5sum.txt", shell: @@ -113,4 +117,6 @@ include: "rules/jrc_ghsl.smk" include: "rules/openstreetmap.smk" include: "rules/storm.smk" include: "rules/wri_powerplants.smk" +include: "rules/copernicus_lulc.smk" +include: "rules/copernicus_dem.smk" include: "rules/zenodo.smk" diff --git a/environment.yml b/environment.yml index c74e410..9a85626 100644 --- a/environment.yml +++ b/environment.yml @@ -8,6 +8,7 @@ dependencies: - pip - pip: - zenodo_get>=1.5.1 # download from Zenodo + - cdsapi # copernicus data api - -e . # irv_datapkg helper - black # Python formatting - gdal>=3.3 # command-line tools for spatial data diff --git a/metadata/copernicus_dem.yml b/metadata/copernicus_dem.yml new file mode 100755 index 0000000..f608682 --- /dev/null +++ b/metadata/copernicus_dem.yml @@ -0,0 +1,21 @@ +# yaml-language-server: $schema=irv-datapkg-schema.json +name: copernicus_dem +description: Copernicus Digital Elevation Model +version: x +dataset_name: copernicus_dem +data_author: x +data_title: x +data_title_long: x +data_summary: > + x + +data_citation: > + x + +data_license: + name: x + title: x + path: x + +data_origin_url: x +data_formats: ["GeoTIFF"] diff --git a/metadata/copernicus_lulc.yml b/metadata/copernicus_lulc.yml new file mode 100755 index 0000000..41d1b8a --- /dev/null +++ b/metadata/copernicus_lulc.yml @@ -0,0 +1,21 @@ +# yaml-language-server: $schema=irv-datapkg-schema.json +name: copernicus_lulc +description: Copernicus Land Cover +version: x +dataset_name: copernicus_lulc +data_author: x +data_title: x +data_title_long: x +data_summary: > + x + +data_citation: > + x + +data_license: + name: x + title: x + path: x + +data_origin_url: x +data_formats: ["GeoTIFF"] diff --git a/rules/copernicus_dem.smk b/rules/copernicus_dem.smk new file mode 100755 index 0000000..4db8b8e --- /dev/null +++ b/rules/copernicus_dem.smk @@ -0,0 +1,25 @@ +# +# Copernicus DEM +# +rule download_dem: + output: + dir=directory("incoming_data/copernicus_dem/glo-90"), + shell: + """ + mkdir -p incoming_data/copernicus_dem + cd incoming_data/copernicus_dem + aws s3 sync s3://copernicus-dem-90m/ --no-sign-request . + """ + +rule convert_dem: + input: + dir="incoming_data/copernicus_dem/glo-90", + output: + tiff="incoming_data/copernicus_dem/copernicus_dem.tif", + shell: + """ + cd incoming_data/copernicus_dem/glo-90 + gdalbuildvrt -input_file_list tiffs.txt copernicus_dsm_cog_30_DEM.vrt + gdal_translate -co "COMPRESS=LZW" -co "TILED=yes" -co "BIGTIFF=YES" -of "GTiff" copernicus_dsm_cog_30_DEM.vrt copernicus_dsm_cog_30_DEM.tif + """ + diff --git a/rules/copernicus_lulc.smk b/rules/copernicus_lulc.smk new file mode 100755 index 0000000..2ab4285 --- /dev/null +++ b/rules/copernicus_lulc.smk @@ -0,0 +1,43 @@ +# +# Copernicus LULC +# + +rule download_lulc: + output: + zip="incoming_data/copernicus_lulc/archive.zip", + run: + path = os.path.join("incoming_data","copernicus_lulc") + if not os.path.isdir(path): + os.mkdir(path) + + from irv_datapkg import download_from_CDS + download_from_CDS( + "satellite-land-cover", + "all", + "zip", + "v2.1.1", + "2020", + "incoming_data/copernicus_lulc/archive.zip") + +rule convert_lulc: + input: + zip="incoming_data/copernicus_lulc/archive.zip", + output: + tif = "incoming_data/copernicus_lulc/copernicus_lulc.tif", + shell: + """ + cd incoming_data/copernicus_lulc + + unzip archive.zip + + gdalwarp \ + -of Gtiff \ + -co COMPRESS=LZW \ + -ot Byte \ + -te -180.0000000 -90.0000000 180.0000000 90.0000000 \ + -tr 0.002777777777778 0.002777777777778 \ + -t_srs EPSG:4326 \ + NETCDF:C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc:lccs_class \ + C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.tif + + """ \ No newline at end of file diff --git a/src/irv_datapkg/__init__.py b/src/irv_datapkg/__init__.py old mode 100644 new mode 100755 index 10e7e77..d33dab2 --- a/src/irv_datapkg/__init__.py +++ b/src/irv_datapkg/__init__.py @@ -3,10 +3,13 @@ import subprocess from pathlib import Path from typing import Optional +import argparse +import os import geopandas import shapely import pyproj +import cdsapi from osgeo import gdal from shapely.ops import transform @@ -83,3 +86,57 @@ def crop_raster( cmd = cmd + f" -co {creation_option}" subprocess.run(shlex.split(cmd), check=True) + +def download_from_CDS(dataset_name: str, variable: str, file_format: str, version: str, year: str, output_path: str) -> None: + """ + Download a resource from the Copernicus CDS API, given appropriate credentials. + + Requires COPERNICUS_CDS_URL and COPERNICUS_CDS_API_KEY to be in the environment. + For more details see: https://cds.climate.copernicus.eu/api-how-to + + Args: + dataset_name: Name of dataset to download + variable: Name of variable to request + file_format: Desired file format e.g. zip + version: Version of dataset + year: Year of dataset applicability + output_path: Where to save the downloaded file + """ + + client = cdsapi.Client( + url=os.environ.get("COPERNICUS_CDS_URL"), + key=os.environ.get("COPERNICUS_CDS_API_KEY") + ) + + # N.B. Files are covered by licences which need to be manually accepted, e.g. + # https://cds.climate.copernicus.eu/cdsapp/#!/terms/satellite-land-cover + # https://cds.climate.copernicus.eu/cdsapp/#!/terms/vito-proba-v + # + # Ideally we could programmatically accept the necessary licence conditions + # the below code is an attempt at that, but fails with an HTTP 403, not + # logged in when trying to simulate a user acceptance + # + # API_URL = os.environ.get("COPERNICUS_CDS_URL") + # payloads = [ + # [{"terms_id":"vito-proba-v","revision":1}], + # [{"terms_id":"satellite-land-cover","revision":1}], + # ] + # for payload in payloads: + # client._api( + # url=f"{API_URL.rstrip('/')}.ui/user/me/terms-and-conditions", + # request=payload, + # method="post" + # ) + # + # See https://github.com/ecmwf/cdsapi/blob/master/cdsapi/api.py + + client.retrieve( + dataset_name, + { + 'variable': variable, + 'format': file_format, + 'version': version, + 'year': year, + }, + output_path + ) From 9eb91fbfffcce113f2c3c5844b40931e6a94d003 Mon Sep 17 00:00:00 2001 From: mans4178 Date: Thu, 11 Jul 2024 15:03:45 +0000 Subject: [PATCH 02/20] added pop and fixed dem/lulc --- rules/copernicus_dem.smk | 2 +- rules/copernicus_lulc.smk | 2 +- rules/ghs_pop.smk | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100755 rules/ghs_pop.smk diff --git a/rules/copernicus_dem.smk b/rules/copernicus_dem.smk index 4db8b8e..7b8a677 100755 --- a/rules/copernicus_dem.smk +++ b/rules/copernicus_dem.smk @@ -15,7 +15,7 @@ rule convert_dem: input: dir="incoming_data/copernicus_dem/glo-90", output: - tiff="incoming_data/copernicus_dem/copernicus_dem.tif", + tiff="incoming_data/copernicus_dem/glo-90/copernicus_dsm_cog_30_DEM.tif", shell: """ cd incoming_data/copernicus_dem/glo-90 diff --git a/rules/copernicus_lulc.smk b/rules/copernicus_lulc.smk index 2ab4285..b799854 100755 --- a/rules/copernicus_lulc.smk +++ b/rules/copernicus_lulc.smk @@ -23,7 +23,7 @@ rule convert_lulc: input: zip="incoming_data/copernicus_lulc/archive.zip", output: - tif = "incoming_data/copernicus_lulc/copernicus_lulc.tif", + tif = "incoming_data/copernicus_lulc/C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.tif", shell: """ cd incoming_data/copernicus_lulc diff --git a/rules/ghs_pop.smk b/rules/ghs_pop.smk new file mode 100755 index 0000000..cea3113 --- /dev/null +++ b/rules/ghs_pop.smk @@ -0,0 +1,32 @@ +# +# Download GHS population data (epoch: 2020, resolution: 3 arcsec, coordinate system: WGS84) +# + +rule pop_ghs_download: + output: + zip="incoming_data/ghs_pop/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif" + shell: + """ + mkdir -p incoming_data/ghs_pop + cd incoming_data/ghs_pop + wget https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2023A/GHS_POP_E2020_GLOBE_R2023A_4326_3ss/V1-0/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.zip" + unzip GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.zip + """ + +rule pop_ghs_clip: + input: + tif="incoming_data/ghs_pop/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif", + bounds="data/{ISO3}/boundaries__{ISO3}.gpkg" + output: + tif="data/{ISO3}/ghs_pop__{ISO3}.tif", + shell: + """ + gdalwarp \ + -co COMPRESS=LZW \ + -cutline {input.bounds} \ + -cl boundaries__{wildcards.ISO3} \ + -crop_to_cutline \ + {input.tif} \ + {output.tif} + """ + From 2747375d030c937ed736d33dcb2c5128413d5e12 Mon Sep 17 00:00:00 2001 From: mans4178 Date: Thu, 11 Jul 2024 15:18:14 +0000 Subject: [PATCH 03/20] added gadm --- Snakefile | 2 ++ rules/gadm.smk | 13 +++++++++++++ rules/ghs_pop.smk | 32 -------------------------------- 3 files changed, 15 insertions(+), 32 deletions(-) create mode 100755 rules/gadm.smk delete mode 100755 rules/ghs_pop.smk diff --git a/Snakefile b/Snakefile index b3311e8..30503d0 100755 --- a/Snakefile +++ b/Snakefile @@ -77,6 +77,7 @@ rule checksums: "data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg", "data/{ISO3}/copernicus_lulc/copernicus_lulc__{ISO3}.tif", "data/{ISO3}/copernicus_dem/copernicus_dem__{ISO3}.tif", + "data/{ISO3}/gadm__{ISO3}.tif", output: checksums="data/{ISO3}/md5sum.txt", shell: @@ -119,4 +120,5 @@ include: "rules/storm.smk" include: "rules/wri_powerplants.smk" include: "rules/copernicus_lulc.smk" include: "rules/copernicus_dem.smk" +include: "rules/gadm.smk" include: "rules/zenodo.smk" diff --git a/rules/gadm.smk b/rules/gadm.smk new file mode 100755 index 0000000..522dd7e --- /dev/null +++ b/rules/gadm.smk @@ -0,0 +1,13 @@ +# +# Download GADM country admin boundaries +# + + +rule gadm: + output: + gpkg="data/{ISO3}/gadm__{ISO3}.gpkg", + shell: + """ + cd data/{wildcards.ISO3} + wget https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm41_{wildcards.ISO3}.gpkg --output-document=gadm__{wildcards.ISO3}.gpkg + """ \ No newline at end of file diff --git a/rules/ghs_pop.smk b/rules/ghs_pop.smk deleted file mode 100755 index cea3113..0000000 --- a/rules/ghs_pop.smk +++ /dev/null @@ -1,32 +0,0 @@ -# -# Download GHS population data (epoch: 2020, resolution: 3 arcsec, coordinate system: WGS84) -# - -rule pop_ghs_download: - output: - zip="incoming_data/ghs_pop/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif" - shell: - """ - mkdir -p incoming_data/ghs_pop - cd incoming_data/ghs_pop - wget https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2023A/GHS_POP_E2020_GLOBE_R2023A_4326_3ss/V1-0/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.zip" - unzip GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.zip - """ - -rule pop_ghs_clip: - input: - tif="incoming_data/ghs_pop/GHS_POP_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif", - bounds="data/{ISO3}/boundaries__{ISO3}.gpkg" - output: - tif="data/{ISO3}/ghs_pop__{ISO3}.tif", - shell: - """ - gdalwarp \ - -co COMPRESS=LZW \ - -cutline {input.bounds} \ - -cl boundaries__{wildcards.ISO3} \ - -crop_to_cutline \ - {input.tif} \ - {output.tif} - """ - From d4ab53d756dc427411102bfc9997ac0d5c9351a3 Mon Sep 17 00:00:00 2001 From: mans4178 Date: Tue, 16 Jul 2024 10:57:21 +0000 Subject: [PATCH 04/20] fixing filepaths --- rules/copernicus_dem.smk | 18 ++++++++++++------ rules/copernicus_lulc.smk | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/rules/copernicus_dem.smk b/rules/copernicus_dem.smk index 7b8a677..8934220 100755 --- a/rules/copernicus_dem.smk +++ b/rules/copernicus_dem.smk @@ -3,7 +3,7 @@ # rule download_dem: output: - dir=directory("incoming_data/copernicus_dem/glo-90"), + dir=directory("incoming_data/copernicus_dem"), shell: """ mkdir -p incoming_data/copernicus_dem @@ -13,13 +13,19 @@ rule download_dem: rule convert_dem: input: - dir="incoming_data/copernicus_dem/glo-90", + txt="incoming_data/copernicus_dem/tileList.txt", output: - tiff="incoming_data/copernicus_dem/glo-90/copernicus_dsm_cog_30_DEM.tif", + tiff="incoming_data/copernicus_dem/copernicus_dem.tif", shell: """ - cd incoming_data/copernicus_dem/glo-90 - gdalbuildvrt -input_file_list tiffs.txt copernicus_dsm_cog_30_DEM.vrt - gdal_translate -co "COMPRESS=LZW" -co "TILED=yes" -co "BIGTIFF=YES" -of "GTiff" copernicus_dsm_cog_30_DEM.vrt copernicus_dsm_cog_30_DEM.tif + cd incoming_data/copernicus_dem + gdalbuildvrt -input_file_list tileList.txt copernicus_dsm_cog_30_DEM.vrt + gdal_translate \ + -co "COMPRESS=LZW" \ + -co "TILED=yes" \ + -co "BIGTIFF=YES" \ + -of "GTiff" \ + copernicus_dsm_cog_30_DEM.vrt \ + copernicus_dem.tif """ diff --git a/rules/copernicus_lulc.smk b/rules/copernicus_lulc.smk index b799854..b8a8422 100755 --- a/rules/copernicus_lulc.smk +++ b/rules/copernicus_lulc.smk @@ -23,7 +23,7 @@ rule convert_lulc: input: zip="incoming_data/copernicus_lulc/archive.zip", output: - tif = "incoming_data/copernicus_lulc/C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.tif", + tif = "incoming_data/copernicus_lulc/copernicus_lulc.tif", shell: """ cd incoming_data/copernicus_lulc @@ -38,6 +38,6 @@ rule convert_lulc: -tr 0.002777777777778 0.002777777777778 \ -t_srs EPSG:4326 \ NETCDF:C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc:lccs_class \ - C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.tif + copernicus_lulc.tif """ \ No newline at end of file From 96f5822e66e42a870df4c47db470635c551be91c Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 9 Aug 2024 12:15:25 +0100 Subject: [PATCH 05/20] Include awscli dependency --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 9a85626..c55e019 100644 --- a/environment.yml +++ b/environment.yml @@ -10,6 +10,7 @@ dependencies: - zenodo_get>=1.5.1 # download from Zenodo - cdsapi # copernicus data api - -e . # irv_datapkg helper + - awscli # connect to AWS, download from S3 - black # Python formatting - gdal>=3.3 # command-line tools for spatial data - geopandas>=0.14.0 # geospatial dataframes From a651ca435897a5a8b438df95818e359bb1ecbd93 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 9 Aug 2024 12:15:41 +0100 Subject: [PATCH 06/20] Include cdsapi dependency --- pyproject.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 99e8121..4ceb127 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,13 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -dependencies = ["geopandas>=0.13", "shapely>=2.0", "pyproj", "GDAL>=3.3"] +dependencies = [ + "geopandas>=0.13", + "shapely>=2.0", + "pyproj", + "GDAL>=3.3", + "cdsapi" +] [project.urls] "Homepage" = "https://github.com/nismod/irv-datapkg" From c3d3703abeddc5a7e5d26146c5be7a43faf5a601 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 9 Aug 2024 12:17:16 +0100 Subject: [PATCH 07/20] Rework cds download to thin layer over API - CDSAPI_URL and CDSAPI_KEY are used as environment variables by cdsapi for auth - request dictionary should be more flexible --- Snakefile | 8 +++---- src/irv_datapkg/__init__.py | 46 +++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/Snakefile b/Snakefile index 30503d0..08170ff 100755 --- a/Snakefile +++ b/Snakefile @@ -19,8 +19,8 @@ BOUNDARY_LU = BOUNDARIES.set_index("CODE_A3") envvars: "ZENODO_TOKEN", - "COPERNICUS_CDS_URL", - "COPERNICUS_CDS_API_KEY" + "CDSAPI_URL", + "CDSAPI_KEY" def boundary_geom(iso3): @@ -119,6 +119,6 @@ include: "rules/openstreetmap.smk" include: "rules/storm.smk" include: "rules/wri_powerplants.smk" include: "rules/copernicus_lulc.smk" -include: "rules/copernicus_dem.smk" -include: "rules/gadm.smk" +include: "rules/copernicus_dem.smk" +include: "rules/gadm.smk" include: "rules/zenodo.smk" diff --git a/src/irv_datapkg/__init__.py b/src/irv_datapkg/__init__.py index d33dab2..6db2762 100755 --- a/src/irv_datapkg/__init__.py +++ b/src/irv_datapkg/__init__.py @@ -1,15 +1,14 @@ -from dataclasses import dataclass +import os import shlex import subprocess +from dataclasses import dataclass from pathlib import Path from typing import Optional -import argparse -import os +import cdsapi import geopandas -import shapely import pyproj -import cdsapi +import shapely from osgeo import gdal from shapely.ops import transform @@ -87,26 +86,28 @@ def crop_raster( subprocess.run(shlex.split(cmd), check=True) -def download_from_CDS(dataset_name: str, variable: str, file_format: str, version: str, year: str, output_path: str) -> None: + +def download_from_CDS( + dataset_name: str, + request: dict, + output_path: str, +) -> None: """ Download a resource from the Copernicus CDS API, given appropriate credentials. - Requires COPERNICUS_CDS_URL and COPERNICUS_CDS_API_KEY to be in the environment. + Requires CDSAPI_URL and CDSAPI_KEY to be in the environment. For more details see: https://cds.climate.copernicus.eu/api-how-to Args: dataset_name: Name of dataset to download - variable: Name of variable to request - file_format: Desired file format e.g. zip - version: Version of dataset - year: Year of dataset applicability + request: Dictionary defining request, could include: + variable: Name of variable to request + file_format: Desired file format e.g. zip + version: Version of dataset + year: Year of dataset applicability output_path: Where to save the downloaded file """ - - client = cdsapi.Client( - url=os.environ.get("COPERNICUS_CDS_URL"), - key=os.environ.get("COPERNICUS_CDS_API_KEY") - ) + client = cdsapi.Client() # N.B. Files are covered by licences which need to be manually accepted, e.g. # https://cds.climate.copernicus.eu/cdsapp/#!/terms/satellite-land-cover @@ -114,9 +115,9 @@ def download_from_CDS(dataset_name: str, variable: str, file_format: str, versio # # Ideally we could programmatically accept the necessary licence conditions # the below code is an attempt at that, but fails with an HTTP 403, not - # logged in when trying to simulate a user acceptance + # logged in when trying to simulate a user acceptance # - # API_URL = os.environ.get("COPERNICUS_CDS_URL") + # API_URL = os.environ.get("CDSAPI_URL") # payloads = [ # [{"terms_id":"vito-proba-v","revision":1}], # [{"terms_id":"satellite-land-cover","revision":1}], @@ -132,11 +133,6 @@ def download_from_CDS(dataset_name: str, variable: str, file_format: str, versio client.retrieve( dataset_name, - { - 'variable': variable, - 'format': file_format, - 'version': version, - 'year': year, - }, - output_path + request, + output_path, ) From a14b79553a3af1ebb7409b01885b79f8fdc58186 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 12:27:22 +0000 Subject: [PATCH 08/20] Ignore backups, api files --- .gitignore | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c8da3fc..c1e1558 100755 --- a/.gitignore +++ b/.gitignore @@ -6,14 +6,16 @@ data incoming_data zenodo *.qgz +*.bak # Secrets *_TOKEN +CDSAPI_* # Python __pycache__ *.py[c,i] *.egg-info -# Mac -._* \ No newline at end of file +# Mac +._* From 0ceace48011d2be999e635d46211df295838d5fe Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 12:27:46 +0000 Subject: [PATCH 09/20] Add notes on CDS access --- README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dec664f..93b9330 100644 --- a/README.md +++ b/README.md @@ -36,19 +36,28 @@ micromamba activate datapkg The data packages are produced using a [`snakemake`](https://snakemake.readthedocs.io/) workflow. -The workflow expects `ZENODO_TOKEN` to be set as an environment variable - this -must be set before running any workflow steps. +The workflow expects `ZENODO_TOKEN`, `CDSAPI_KEY` and `CDSAPI_URL` to be set as +environment variables - these must be set before running any workflow steps. -If not interacting with Zenodo, this can be a dummy string: +If not interacting with Zenodo or the Copernicus Climate Data Store, these can +be dummy strings: ```bash echo "placeholder" > ZENODO_TOKEN +echo "https://cds-beta.climate.copernicus.eu/api" > CDSAPI_URL +echo "test" > CDSAPI_KEY ``` +See [Climate Data Store API +docs](https://cds-beta.climate.copernicus.eu/how-to-api#use-the-cds-api-client-for-data-access) +and [Zenodo API docs](https://developers.zenodo.org/#introduction) for access details. + Export from the file to the environment: ```bash export ZENODO_TOKEN=$(cat ZENODO_TOKEN) +export CDSAPI_KEY=$(cat CDSAPI_KEY) +export CDSAPI_URL=$(cat CDSAPI_URL) ``` Check what will be run, if we ask for everything produced by the rule `all`, From b30e3aff5143bbfd96dbee3da21135fb6901f286 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 12:28:32 +0000 Subject: [PATCH 10/20] Update CDS DEM and LULC rules --- metadata/copernicus_dem.yml | 32 ++++++++------ metadata/copernicus_lulc.yml | 66 +++++++++++++++++++++++----- rules/copernicus_dem.smk | 83 ++++++++++++++++++++++++++++-------- rules/copernicus_lulc.smk | 32 +++++++------- 4 files changed, 156 insertions(+), 57 deletions(-) diff --git a/metadata/copernicus_dem.yml b/metadata/copernicus_dem.yml index f608682..47252d8 100755 --- a/metadata/copernicus_dem.yml +++ b/metadata/copernicus_dem.yml @@ -1,21 +1,29 @@ # yaml-language-server: $schema=irv-datapkg-schema.json name: copernicus_dem -description: Copernicus Digital Elevation Model -version: x -dataset_name: copernicus_dem -data_author: x -data_title: x -data_title_long: x +description: Copernicus 90m Digital Elevation Model +version: 2023_1 +dataset_name: copernicus_glo90 +data_author: European Union and ESA +data_title: Copernicus GLO-90 +data_title_long: Copernicus Global Digital Elevation Model (WorldDEM GLO-90) + data_summary: > - x + The Copernicus DEM is a Digital Surface Model (DSM) which represents the + surface of the Earth including buildings, infrastructure and vegetation. + GLO-90 provides worldwide coverage at 90 meters. Note that ocean areas do not + have tiles, there one can assume height values equal to zero. Data is provided + as Cloud Optimized GeoTIFFs and comes from Copernicus DEM 2021 release. data_citation: > - x + Copernicus DEM - Global Digital Elevation Model (2021) + https://doi.org/10.5270/ESA-c5d3d65 (produced using Copernicus WorldDEM™-90 © + DLR e.V. 2010-2014 and © Airbus Defence and Space GmbH 2014-2018 provided + under COPERNICUS by the European Union and ESA; all rights reserved) data_license: - name: x - title: x - path: x + name: Copernicus-DEM-EULA + title: Licence for Copernicus DEM instance COP-DEM-GLO-90-F Global 90m Full, Free & Open + path: https://esa.maps.eox.at/copernicus-dem_eula.pdf -data_origin_url: x +data_origin_url: https://dataspace.copernicus.eu/explore-data/data-collections/copernicus-contributing-missions/collections-description/COP-DEM data_formats: ["GeoTIFF"] diff --git a/metadata/copernicus_lulc.yml b/metadata/copernicus_lulc.yml index 41d1b8a..796e1bc 100755 --- a/metadata/copernicus_lulc.yml +++ b/metadata/copernicus_lulc.yml @@ -1,21 +1,65 @@ # yaml-language-server: $schema=irv-datapkg-schema.json name: copernicus_lulc -description: Copernicus Land Cover -version: x +description: Copernicus Land Cover Classification +version: v2.1.1 dataset_name: copernicus_lulc -data_author: x -data_title: x -data_title_long: x +data_author: Copernicus Climate Change Service, Climate Data Store +data_title: Copernicus Land Cover Classification +data_title_long: Land cover classification gridded maps from 1992 to present derived from satellite observations data_summary: > - x + This dataset provides global maps describing the land surface into 22 classes, + which have been defined using the United Nations Food and Agriculture + Organization's (UN FAO) Land Cover Classification System (LCCS). In addition + to the land cover (LC) maps, four quality flags are produced to document the + reliability of the classification and change detection. + + In order to ensure continuity, these land cover maps are consistent with the + series of global annual LC maps from the 1990s to 2015 produced by the + European Space Agency (ESA) Climate Change Initiative (CCI), which are also + available on the ESA CCI LC viewer. + + To produce this dataset, the entire Medium Resolution Imaging Spectrometer + (MERIS) Full and Reduced Resolution archive from 2003 to 2012 was first + classified into a unique 10-year baseline LC map. This is then back- and + up-dated using change detected from (i) Advanced Very-High-Resolution + Radiometer (AVHRR) time series from 1992 to 1999, (ii) SPOT-Vegetation + (SPOT-VGT) time series from 1998 to 2012 and (iii) PROBA-Vegetation (PROBA-V) + and Sentinel-3 OLCI (S3 OLCI) time series from 2013. + + Beyond the climate-modelling communities, this dataset's long-term + consistency, yearly updates, and high thematic detail on a global scale have + made it attractive for a multitude of applications such as land accounting, + forest monitoring and desertification, in addition to scientific research. + + The products are made available to the public by ESA and the consortium. You + may use one or several CCI-LC products land cover map for educational and/or + scientific purposes, without any fee on the condition that you credit the ESA + Climate Change Initiative and in particular its Land Cover project as the + source of the CCI-LC database. Should you write any scientific publication on + the results of research activities that use one or several CCI-LC products as + input, you shall acknowledge the ESA CCI Land Cover project in the text of the + publication and provide the project with an electronic copy of the publication + (contact@esa-landcover-cci.org). If you wish to use one or several CCI-LC + products in advertising or in any commercial promotion, you shall acknowledge + the ESA CCI Land Cover project and you must submit the layout to the project + for approval beforehand (contact@esa-landcover-cci.org). + + © ESA Climate Change Initiative - Land Cover led by UCLouvain (2017) + + Generated using Copernicus Climate Change Service information [2024]. Neither + the European Commission nor ECMWF is responsible for any use that may be made + of the Copernicus information or data it contains. data_citation: > - x + Copernicus Climate Change Service, Climate Data Store, (2019): Land cover + classification gridded maps from 1992 to present derived from satellite + observation. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). + DOI: 10.24381/cds.006f2c9a (Accessed on 09-AUG-2024) data_license: - name: x - title: x - path: x + name: ESA-CCI + title: ESA CCI Land Cover licence + path: https://object-store.os-api.cci2.ecmwf.int/cci2-prod-catalogue/licences/satellite-land-cover/satellite-land-cover_8423d13d3dfd95bbeca92d9355516f21de90d9b40083a915ead15a189d6120fa.pdf -data_origin_url: x +data_origin_url: https://cds-beta.climate.copernicus.eu/datasets/satellite-land-cover?tab=overview data_formats: ["GeoTIFF"] diff --git a/rules/copernicus_dem.smk b/rules/copernicus_dem.smk index 8934220..fa02f11 100755 --- a/rules/copernicus_dem.smk +++ b/rules/copernicus_dem.smk @@ -1,31 +1,80 @@ # # Copernicus DEM # -rule download_dem: +rule list_dem_glo90: output: - dir=directory("incoming_data/copernicus_dem"), + txt="incoming_data/copernicus_dem/COP-DEM_GLO-90-DGED__2023_1.txt", shell: """ - mkdir -p incoming_data/copernicus_dem - cd incoming_data/copernicus_dem - aws s3 sync s3://copernicus-dem-90m/ --no-sign-request . + out_dir = $(dirname {output.txt}) + mkdir -p $out_dir + + curl -k -H "accept: csv" \ + https://prism-dem-open.copernicus.eu/pd-desk-open-access/publicDemURLs/COP-DEM_GLO-90-DGED__2023_1 \ + > {output.txt} """ -rule convert_dem: +rule download_dem_glo90: input: - txt="incoming_data/copernicus_dem/tileList.txt", + txt=rules.list_dem_glo90.output.txt output: - tiff="incoming_data/copernicus_dem/copernicus_dem.tif", + dir=directory("incoming_data/copernicus_dem/archive"), shell: """ - cd incoming_data/copernicus_dem - gdalbuildvrt -input_file_list tileList.txt copernicus_dsm_cog_30_DEM.vrt - gdal_translate \ - -co "COMPRESS=LZW" \ - -co "TILED=yes" \ - -co "BIGTIFF=YES" \ - -of "GTiff" \ - copernicus_dsm_cog_30_DEM.vrt \ - copernicus_dem.tif + mkdir -p {output.dir}.tmp + pushd {output.dir}.tmp + + cat ../COP-DEM_GLO-90-DGED__2023_1.txt | parallel 'wget --no-clobber {{}}' + + popd + mv {output.dir}.tmp {output.dir} + """ + +rule create_protected_dir: + output: + dir=protected(directory("incoming_data/copernicus_dem/test")), + shell: + """ + mkdir -p {output.dir}.tmp + pushd {output.dir}.tmp + + cat ../COP-DEM_GLO-90-DGED__2023_1.txt | parallel 'echo {{}} >> $(basename {{}})' + + popd + ln -s {output.dir}.tmp {output.dir} """ +rule convert_dem_glo90: + input: + dir=rules.download_dem_glo90.output.dir, + output: + tiff="incoming_data/copernicus_dem/copernicus_dem.tif", + shell: + """ + pushd {input.dir}/.. + mkdir tiles + + # Extract + find -type f -name '*.tar' | sed 's/\.\/archive\///'' | sed 's/.tar//' | parallel + tar xvf \ + archive/{{}}.tar \ + --strip-components=2 \ + -C tiles/ \ + {{}}/DEM/{{}}_DEM.tif + + # Build list + find -type f -name '*.tif' > tileList.txt + + # Build VRT + gdalbuildvrt -input_file_list tileList.txt copernicus_dem.vrt + + # Combine to big TIFF + gdal_translate \ + -co "COMPRESS=LZW" \ + -co "TILED=yes" \ + -co "BIGTIFF=YES" \ + -of "GTiff" \ + copernicus_dem.vrt \ + copernicus_dem.tif + popd + """ diff --git a/rules/copernicus_lulc.smk b/rules/copernicus_lulc.smk index b8a8422..fd6973d 100755 --- a/rules/copernicus_lulc.smk +++ b/rules/copernicus_lulc.smk @@ -4,31 +4,30 @@ rule download_lulc: output: - zip="incoming_data/copernicus_lulc/archive.zip", + archive="incoming_data/copernicus_lulc/archive.tgz", run: - path = os.path.join("incoming_data","copernicus_lulc") - if not os.path.isdir(path): - os.mkdir(path) - from irv_datapkg import download_from_CDS download_from_CDS( "satellite-land-cover", - "all", - "zip", - "v2.1.1", - "2020", - "incoming_data/copernicus_lulc/archive.zip") + { + 'variable': 'all', + 'year': ['2022'], + 'version': ['v2_1_1'], + 'format': 'tgz' + }, + output.archive + ) rule convert_lulc: input: - zip="incoming_data/copernicus_lulc/archive.zip", + archive=rules.download_lulc.output.archive, output: tif = "incoming_data/copernicus_lulc/copernicus_lulc.tif", shell: - """ + """ cd incoming_data/copernicus_lulc - - unzip archive.zip + + tar xvzf $(basename {input.archive}) gdalwarp \ -of Gtiff \ @@ -37,7 +36,6 @@ rule convert_lulc: -te -180.0000000 -90.0000000 180.0000000 90.0000000 \ -tr 0.002777777777778 0.002777777777778 \ -t_srs EPSG:4326 \ - NETCDF:C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc:lccs_class \ + NETCDF:C3S-LC-L4-LCCS-Map-300m-P1Y-2022-v2.1.1.nc:lccs_class \ copernicus_lulc.tif - - """ \ No newline at end of file + """ From e3f865abd00fab749529f2bbd1666a2456da2c15 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:49:35 +0000 Subject: [PATCH 11/20] Bump dependency versions --- environment.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/environment.yml b/environment.yml index c55e019..5e1a6c5 100644 --- a/environment.yml +++ b/environment.yml @@ -1,21 +1,21 @@ name: datapkg channels: + - nodefaults - bioconda # for snakemake - conda-forge # for most other packages - - defaults dependencies: - - python=3.11 + - python=3.12 - pip - pip: - - zenodo_get>=1.5.1 # download from Zenodo - - cdsapi # copernicus data api + - zenodo_get>=1.6.1 # download from Zenodo + - cdsapi>=0.7.2 # copernicus data api - -e . # irv_datapkg helper - awscli # connect to AWS, download from S3 - black # Python formatting - gdal>=3.3 # command-line tools for spatial data - - geopandas>=0.14.0 # geospatial dataframes + - geopandas>=1.0.1 # geospatial dataframes - osmium-tool==1.16.0 # openstreetmap extracts - pyyaml # read YAML files - pyogrio # faster geospatial i/o - - snakemake==7.32.4 # workflow management + - snakemake==8.25.5 # workflow management - snakefmt # Snakefile formatting From 7a1ba86cde09a3c958689e544ce0407242bdbad6 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:50:03 +0000 Subject: [PATCH 12/20] Skip GADM --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 08170ff..ba8d744 100755 --- a/Snakefile +++ b/Snakefile @@ -77,7 +77,7 @@ rule checksums: "data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg", "data/{ISO3}/copernicus_lulc/copernicus_lulc__{ISO3}.tif", "data/{ISO3}/copernicus_dem/copernicus_dem__{ISO3}.tif", - "data/{ISO3}/gadm__{ISO3}.tif", + # "data/{ISO3}/gadm__{ISO3}.gpkg", output: checksums="data/{ISO3}/md5sum.txt", shell: From 5da993930dfd11678791bec81cf8ebeffdd433dc Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:50:17 +0000 Subject: [PATCH 13/20] Fix escape pattern --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index ba8d744..7b36af0 100755 --- a/Snakefile +++ b/Snakefile @@ -83,7 +83,7 @@ rule checksums: shell: """ cd data/{wildcards.ISO3} - md5sum **/*.* | grep "tif\|gpkg" | sort -k 2 > md5sum.txt + md5sum **/*.* | grep "tif\\|gpkg" | sort -k 2 > md5sum.txt """ From a679a0a08bf1c141d336227f57dec15d95f2f889 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:50:41 +0000 Subject: [PATCH 14/20] Separate extract/convert rules --- rules/copernicus_dem.smk | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/rules/copernicus_dem.smk b/rules/copernicus_dem.smk index fa02f11..7bf15bb 100755 --- a/rules/copernicus_dem.smk +++ b/rules/copernicus_dem.smk @@ -30,38 +30,39 @@ rule download_dem_glo90: mv {output.dir}.tmp {output.dir} """ -rule create_protected_dir: +rule extract_dem_glo90: + input: + dir=rules.download_dem_glo90.output.dir, output: - dir=protected(directory("incoming_data/copernicus_dem/test")), + dir=directory("incoming_data/copernicus_dem/tiles"), shell: """ - mkdir -p {output.dir}.tmp - pushd {output.dir}.tmp - - cat ../COP-DEM_GLO-90-DGED__2023_1.txt | parallel 'echo {{}} >> $(basename {{}})' + pushd incoming_data/copernicus_dem + mkdir -p tiles + # Extract + find -type f -name '*.tar' | \ + head | \ + sed 's/.\\/archive\\///' | \ + sed 's/.tar//' | \ + parallel -j 1 \ + tar xvf \ + {{}}.tar \ + --skip-old-files \ + --strip-components=2 \ + -C ./tiles/ \ + {{}}/DEM/{{}}_DEM.tif popd - ln -s {output.dir}.tmp {output.dir} """ rule convert_dem_glo90: input: - dir=rules.download_dem_glo90.output.dir, + dir=rules.extract_dem_glo90.output.dir, output: tiff="incoming_data/copernicus_dem/copernicus_dem.tif", shell: """ - pushd {input.dir}/.. - mkdir tiles - - # Extract - find -type f -name '*.tar' | sed 's/\.\/archive\///'' | sed 's/.tar//' | parallel - tar xvf \ - archive/{{}}.tar \ - --strip-components=2 \ - -C tiles/ \ - {{}}/DEM/{{}}_DEM.tif - + pushd incoming_data/copernicus_dem # Build list find -type f -name '*.tif' > tileList.txt From e2ff37269f1d02ad24bc3ca67065f7163c5dcf61 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:50:53 +0000 Subject: [PATCH 15/20] Draft zenodo update --- rules/zenodo.smk | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/rules/zenodo.smk b/rules/zenodo.smk index 3e9f72b..fedb6f3 100644 --- a/rules/zenodo.smk +++ b/rules/zenodo.smk @@ -27,8 +27,22 @@ rule create_deposition: deposition = r.json() # Save details - with open(output.json, "w") as fh: - json.dump(deposition, fh, indent=2) + write_deposition(output.json, deposition) + +def get_deposition(deposition_id): + params = {"access_token": os.environ["ZENODO_TOKEN"]} + r = requests.get(f"https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}", params=params) + r.raise_for_status() + deposition = r.json() + return deposition + +def log_deposition(iso3, deposition, deposition_id): + with open(f"zenodo/{iso3}.deposition.{deposition_id}.{datetime.now().isoformat()}.json", "w") as fh: + json.dump(deposition, fh, indent=2) + +def write_deposition(fname, deposition): + with open(fname, "w") as fh: + json.dump(deposition, fh, indent=2) rule deposit: @@ -48,6 +62,24 @@ rule deposit: datapackage = json.load(fh) deposition_id = deposition["id"] + + # Check and create a new version if the last one was submitted + + # Get latest deposition + deposition = get_deposition(deposition_id) + + log_deposition(wildcards.ISO3, deposition, deposition_id) + + if deposition["submitted"]: + r = requests.post(f'https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/actions/newversion', params=params) + r.raise_for_status() + response = r.json() + deposition_id = response["latest_draft"].split("/")[-1] + deposition = get_deposition(deposition_id) + log_deposition(wildcards.ISO3, deposition, deposition_id) + # NOTE overwriting an input file (should be okay, it's marked as ancient) + write_deposition(input.deposition, deposition) + bucket_url = deposition["links"]["bucket"] # Upload files @@ -62,7 +94,7 @@ rule deposit: print(r.json()) r.raise_for_status() - # Set up metadata + # Set up metadata centroid = boundary_geom(wildcards.ISO3).centroid place_name = BOUNDARY_LU.loc[wildcards.ISO3, "NAME"] From 7c19e6ea0f2d5b7bc49e21737d17e4a885b0e5ab Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 15:51:05 +0000 Subject: [PATCH 16/20] Bump version and sandbox while testing --- Snakefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Snakefile b/Snakefile index 7b36af0..2c1f5f7 100755 --- a/Snakefile +++ b/Snakefile @@ -9,9 +9,9 @@ import pandas import requests import shapely -DATAPKG_VERSION = "0.1.0" -# ZENODO_URL = "sandbox.zenodo.org" -ZENODO_URL = "zenodo.org" +DATAPKG_VERSION = "0.2.0" +ZENODO_URL = "sandbox.zenodo.org" +# ZENODO_URL = "zenodo.org" BOUNDARIES = irv_datapkg.read_boundaries(Path(".")) BOUNDARY_LU = BOUNDARIES.set_index("CODE_A3") From 668c65808b086dc9edebe90b0c237c01174feb7b Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 17:38:22 +0000 Subject: [PATCH 17/20] Remove GADM rule - data is not redistributable --- Snakefile | 1 - rules/gadm.smk | 13 ------------- 2 files changed, 14 deletions(-) delete mode 100755 rules/gadm.smk diff --git a/Snakefile b/Snakefile index 2c1f5f7..7b77629 100755 --- a/Snakefile +++ b/Snakefile @@ -77,7 +77,6 @@ rule checksums: "data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg", "data/{ISO3}/copernicus_lulc/copernicus_lulc__{ISO3}.tif", "data/{ISO3}/copernicus_dem/copernicus_dem__{ISO3}.tif", - # "data/{ISO3}/gadm__{ISO3}.gpkg", output: checksums="data/{ISO3}/md5sum.txt", shell: diff --git a/rules/gadm.smk b/rules/gadm.smk deleted file mode 100755 index 522dd7e..0000000 --- a/rules/gadm.smk +++ /dev/null @@ -1,13 +0,0 @@ -# -# Download GADM country admin boundaries -# - - -rule gadm: - output: - gpkg="data/{ISO3}/gadm__{ISO3}.gpkg", - shell: - """ - cd data/{wildcards.ISO3} - wget https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm41_{wildcards.ISO3}.gpkg --output-document=gadm__{wildcards.ISO3}.gpkg - """ \ No newline at end of file From bc28cd710b2cc6ecf0ad76dc46581cdd0d0bdc0e Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 17:39:27 +0000 Subject: [PATCH 18/20] Zenodo update if available --- Snakefile | 1 + rules/zenodo.smk | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index 7b77629..e42534c 100755 --- a/Snakefile +++ b/Snakefile @@ -1,5 +1,6 @@ import json import shutil +from datetime import datetime from pathlib import Path from glob import glob diff --git a/rules/zenodo.smk b/rules/zenodo.smk index fedb6f3..5d395f9 100644 --- a/rules/zenodo.smk +++ b/rules/zenodo.smk @@ -54,6 +54,7 @@ rule deposit: touch("zenodo/{ISO3}.deposited"), run: params = {"access_token": os.environ["ZENODO_TOKEN"]} + headers = {'Authorization': f"Bearer {os.environ["ZENODO_TOKEN"]}"} with open(input.deposition, "r") as fh: deposition = json.load(fh) @@ -71,15 +72,33 @@ rule deposit: log_deposition(wildcards.ISO3, deposition, deposition_id) if deposition["submitted"]: - r = requests.post(f'https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/actions/newversion', params=params) + # Request a new deposition to draft a new version + + # NOTE: this seems to fail if there's already a draft - workaround is to search for the draft and discard it manually + # POST /api/deposit/depositions/:id/actions/newversion + r = requests.post(f'https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/actions/newversion', headers=headers) r.raise_for_status() response = r.json() - deposition_id = response["latest_draft"].split("/")[-1] + + # Find draft deposition ID in response + deposition_id = response["links"]["latest_draft"].split("/")[-1] deposition = get_deposition(deposition_id) log_deposition(wildcards.ISO3, deposition, deposition_id) # NOTE overwriting an input file (should be okay, it's marked as ancient) write_deposition(input.deposition, deposition) + # List files + # GET /api/deposit/depositions/:id/files + r = requests.get(f"https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/files", headers=headers) + r.raise_for_status() + files = r.json() + + # Delete each file + # DELETE /api/deposit/depositions/:id/files/:file_id + for file_ in files: + r = requests.delete(f"https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/files/{file_["id"]}", headers=headers) + r.raise_for_status() + bucket_url = deposition["links"]["bucket"] # Upload files From 45bff312887b4b3d872dc4fdfa0a969a27e6f136 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 17:43:26 +0000 Subject: [PATCH 19/20] Drop GADM rule from Snakefile --- Snakefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Snakefile b/Snakefile index e42534c..38bf703 100755 --- a/Snakefile +++ b/Snakefile @@ -120,5 +120,4 @@ include: "rules/storm.smk" include: "rules/wri_powerplants.smk" include: "rules/copernicus_lulc.smk" include: "rules/copernicus_dem.smk" -include: "rules/gadm.smk" include: "rules/zenodo.smk" From a6c8ab078d6817999e679d8c159adf9ffe7eea2f Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 17 Dec 2024 17:43:39 +0000 Subject: [PATCH 20/20] Add notes on Zenodo draft versions --- rules/zenodo.smk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rules/zenodo.smk b/rules/zenodo.smk index 5d395f9..342d214 100644 --- a/rules/zenodo.smk +++ b/rules/zenodo.smk @@ -74,8 +74,10 @@ rule deposit: if deposition["submitted"]: # Request a new deposition to draft a new version - # NOTE: this seems to fail if there's already a draft - workaround is to search for the draft and discard it manually # POST /api/deposit/depositions/:id/actions/newversion + # NOTE: this seems to fail if there's already a draft - workaround is to search for the draft and discard it manually + # could search all depositions for unsubmitted and discard? + # or could search for unsubmitted matching "conceptdoi" and use it? r = requests.post(f'https://{ZENODO_URL}/api/deposit/depositions/{deposition_id}/actions/newversion', headers=headers) r.raise_for_status() response = r.json()