From d7e17e7191cd0b7e218de108041b31d5aa50a57d Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Mon, 14 Nov 2022 19:07:22 +0100 Subject: [PATCH 1/7] added initial cru recipe --- recipes/cru-ts/recipe.py | 149 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 recipes/cru-ts/recipe.py diff --git a/recipes/cru-ts/recipe.py b/recipes/cru-ts/recipe.py new file mode 100644 index 0000000000..451f37753b --- /dev/null +++ b/recipes/cru-ts/recipe.py @@ -0,0 +1,149 @@ +# The authentication is directly copied from https://github.com/cedadev/opendap-python-example/blob/master/simple_file_downloader.py +# +import os +import datetime +import ssl +from getpass import getpass + + +# Import third-party libraries +from cryptography import x509 +from cryptography.hazmat.backends import default_backend + +# from contrail.security.online_ca_client import OnlineCaClient +# pip install ContrailOnlineCAClient +from contrail.security.onlineca.client import OnlineCaClient + +# Credentials defaults +DODS_FILE_CONTENTS = """HTTP.COOKIEJAR=./dods_cookies +HTTP.SSL.CERTIFICATE=./credentials.pem +HTTP.SSL.KEY=./credentials.pem +HTTP.SSL.CAPATH=./ca-trustroots +""" + +DODS_FILE_PATH = os.path.expanduser('~/.dodsrc') +CERTS_DIR = os.path.expanduser('~/.certs') + +if not os.path.isdir(CERTS_DIR): + os.makedirs(CERTS_DIR) + +TRUSTROOTS_DIR = os.path.join(CERTS_DIR, 'ca-trustroots') +CREDENTIALS_FILE_PATH = os.path.join(CERTS_DIR, 'credentials.pem') + +TRUSTROOTS_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/trustroots/' +CERT_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/certificate/' + + +def write_dods_file_contents(): + + DODS_FILE_CONTENTS = """ + HTTP.COOKIEJAR=./dods_cookies + HTTP.SSL.CERTIFICATE={credentials_file_path} + HTTP.SSL.KEY={credentials_file_path} + HTTP.SSL.CAPATH={trustroots_dir} + """.format(credentials_file_path=CREDENTIALS_FILE_PATH, trustroots_dir=TRUSTROOTS_DIR) + + with open(DODS_FILE_PATH, 'w') as dods_file: + dods_file.write(DODS_FILE_CONTENTS) + + +def cert_is_valid(cert_file, min_lifetime=0): + """ + Returns boolean - True if the certificate is in date. + Optional argument min_lifetime is the number of seconds + which must remain. + :param cert_file: certificate file path. + :param min_lifetime: minimum lifetime (seconds) + :return: boolean + """ + try: + with open(cert_file, 'rb') as f: + crt_data = f.read() + except IOError: + return False + + try: + cert = x509.load_pem_x509_certificate(crt_data, default_backend()) + except ValueError: + return False + + now = datetime.datetime.now() + + return (cert.not_valid_before <= now + and cert.not_valid_after > now + datetime.timedelta(0, min_lifetime)) + + + +def setup_credentials(force=False): + """ + Download and create required credentials files. + Return True if credentials were set up. + Return False is credentials were already set up. + :param force: boolean + :return: boolean + """ + # Test for DODS_FILE and only re-get credentials if it doesn't + # exist AND `force` is True AND certificate is in-date. + if os.path.isfile(DODS_FILE_PATH) and not force and cert_is_valid(CREDENTIALS_FILE_PATH): + print('[INFO] Security credentials already set up.') + return CREDENTIALS_FILE_PATH + + onlineca_client = OnlineCaClient() + onlineca_client.ca_cert_dir = TRUSTROOTS_DIR + + # Set up trust roots + trustroots = onlineca_client.get_trustroots( + TRUSTROOTS_SERVICE, + bootstrap=True, + write_to_ca_cert_dir=True) + + #username = input("CEDA username") + #password = getpass("CEDA password") + username = os.environ['CEDA_USERNAME'] + password = os.environ['CEDA_PASSWORD'] + + + # Write certificate credentials file + key_pair, certs = onlineca_client.get_certificate( + username, + password, + CERT_SERVICE, + pem_out_filepath=CREDENTIALS_FILE_PATH) + + # Write the dodsrc credentials file + write_dods_file_contents() + + print('[INFO] Security credentials set up.') + return CREDENTIALS_FILE_PATH + + + + + +### +++++++++ here comes the actual recipe definition +++++++++++++++++++++++ +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim +from pangeo_forge_recipes.recipes import XarrayZarrRecipe +from pangeo_forge_recipes.recipes import setup_logging + + +def get_ssl(): + sslcontext = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH) + sslcontext.load_cert_chain(setup_credentials()) + return sslcontext + + +def make_url(time, variable, version="4.05"): + ## there is some peculiarities with 4.06 where the file pattern has an exception for cld + ## the version for cld is 4.06.01 and 4.06 for all other variables... + return f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/data/{variable}/cru_ts{version}.1901.1910.{variable}.dat.nc.gz' + +# This is inspired by the EOBS feedstock: https://github.com/pangeo-forge/EOBS-feedstock/blob/main/feedstock/recipe.py +pattern = FilePattern( + make_url, + ConcatDim('time', keys=['']), + MergeDim(name='variable', keys=["cld", "dtr", "frs", "pet", "pre", "tmn", "tmp", "tmx", "vap", "wet"]), + fsspec_open_kwargs={'compression':'gzip', 'ssl': get_ssl()}, file_type="netcdf3" +) + + +recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 40}) \ No newline at end of file From 16380c852b202b786dd05808f4aa927fc5ce53f1 Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Mon, 14 Nov 2022 19:30:01 +0100 Subject: [PATCH 2/7] added meta.yaml --- recipes/cru-ts/meta.yaml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 recipes/cru-ts/meta.yaml diff --git a/recipes/cru-ts/meta.yaml b/recipes/cru-ts/meta.yaml new file mode 100644 index 0000000000..23d128c3a8 --- /dev/null +++ b/recipes/cru-ts/meta.yaml @@ -0,0 +1,39 @@ +title: "CRU-TS dataset" +description: | + The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.05 data are month-by-month variations in climate over the period 1901-2020, + provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for + Atmospheric Science (NCAS), a NERC collaborative centre. + + The CRU TS4.05 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), + precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2020. + + The CRU TS4.05 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. + Please see the release notes for full details of this version update. + +pangeo_forge_version: "0.9.0" +recipes: + - id: cru-ts-4.05 + object: "recipe:recipe" +provenance: + providers: + - name: "CEDA" + description: "Centre for Environmental Data Analysis" + roles: + - provider + url: https://catalogue.ceda.ac.uk/uuid/c26a65020a5e4b80b20018f148556681 + - name: "CRU" + description: "Climatic Research Unit" + roles: + - producer + - licensor + url: https://crudata.uea.ac.uk/cru/data/hrg + license: "Open Government License" + license_link: https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/ +maintainers: + - name: "Lars Buntemeyer" + orcid: "0000-0002-0849-2404" + github: larsbuntemeyer + +bakery: + + id: "pangeo-ldeo-nsf-earthcube" From 3602a4c75975dadbe5513d2477411f3b40c88cd0 Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Mon, 14 Nov 2022 19:49:35 +0100 Subject: [PATCH 3/7] run pre-commit --- recipes/cru-ts/meta.yaml | 35 +++++++++-------- recipes/cru-ts/recipe.py | 81 ++++++++++++++++++++-------------------- 2 files changed, 57 insertions(+), 59 deletions(-) diff --git a/recipes/cru-ts/meta.yaml b/recipes/cru-ts/meta.yaml index 23d128c3a8..f116bf91bd 100644 --- a/recipes/cru-ts/meta.yaml +++ b/recipes/cru-ts/meta.yaml @@ -1,39 +1,38 @@ -title: "CRU-TS dataset" +title: 'CRU-TS dataset' description: | - The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.05 data are month-by-month variations in climate over the period 1901-2020, - provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for + The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.05 data are month-by-month variations in climate over the period 1901-2020, + provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for Atmospheric Science (NCAS), a NERC collaborative centre. - - The CRU TS4.05 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), + + The CRU TS4.05 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2020. - The CRU TS4.05 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. - Please see the release notes for full details of this version update. + The CRU TS4.05 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. + Please see the release notes for full details of this version update. -pangeo_forge_version: "0.9.0" +pangeo_forge_version: '0.9.0' recipes: - id: cru-ts-4.05 - object: "recipe:recipe" + object: 'recipe:recipe' provenance: providers: - - name: "CEDA" - description: "Centre for Environmental Data Analysis" + - name: 'CEDA' + description: 'Centre for Environmental Data Analysis' roles: - provider url: https://catalogue.ceda.ac.uk/uuid/c26a65020a5e4b80b20018f148556681 - - name: "CRU" - description: "Climatic Research Unit" + - name: 'CRU' + description: 'Climatic Research Unit' roles: - producer - licensor url: https://crudata.uea.ac.uk/cru/data/hrg - license: "Open Government License" + license: 'Open Government License' license_link: https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/ maintainers: - - name: "Lars Buntemeyer" - orcid: "0000-0002-0849-2404" + - name: 'Lars Buntemeyer' + orcid: '0000-0002-0849-2404' github: larsbuntemeyer bakery: - - id: "pangeo-ldeo-nsf-earthcube" + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/cru-ts/recipe.py b/recipes/cru-ts/recipe.py index 451f37753b..d3c4758848 100644 --- a/recipes/cru-ts/recipe.py +++ b/recipes/cru-ts/recipe.py @@ -1,18 +1,19 @@ -# The authentication is directly copied from https://github.com/cedadev/opendap-python-example/blob/master/simple_file_downloader.py +# The authentication is directly copied from: +# +# https://github.com/cedadev/opendap-python-example/blob/master/simple_file_downloader.py # -import os import datetime +import os import ssl -from getpass import getpass - - -# Import third-party libraries -from cryptography import x509 -from cryptography.hazmat.backends import default_backend # from contrail.security.online_ca_client import OnlineCaClient # pip install ContrailOnlineCAClient from contrail.security.onlineca.client import OnlineCaClient +from cryptography import x509 +from cryptography.hazmat.backends import default_backend + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim +from pangeo_forge_recipes.recipes import XarrayZarrRecipe # Credentials defaults DODS_FILE_CONTENTS = """HTTP.COOKIEJAR=./dods_cookies @@ -41,7 +42,9 @@ def write_dods_file_contents(): HTTP.SSL.CERTIFICATE={credentials_file_path} HTTP.SSL.KEY={credentials_file_path} HTTP.SSL.CAPATH={trustroots_dir} - """.format(credentials_file_path=CREDENTIALS_FILE_PATH, trustroots_dir=TRUSTROOTS_DIR) + """.format( + credentials_file_path=CREDENTIALS_FILE_PATH, trustroots_dir=TRUSTROOTS_DIR + ) with open(DODS_FILE_PATH, 'w') as dods_file: dods_file.write(DODS_FILE_CONTENTS) @@ -59,7 +62,7 @@ def cert_is_valid(cert_file, min_lifetime=0): try: with open(cert_file, 'rb') as f: crt_data = f.read() - except IOError: + except OSError: return False try: @@ -69,11 +72,11 @@ def cert_is_valid(cert_file, min_lifetime=0): now = datetime.datetime.now() - return (cert.not_valid_before <= now - and cert.not_valid_after > now + datetime.timedelta(0, min_lifetime)) - + return cert.not_valid_before <= now and cert.not_valid_after > now + datetime.timedelta( + 0, min_lifetime + ) + - def setup_credentials(force=False): """ Download and create required credentials files. @@ -92,23 +95,17 @@ def setup_credentials(force=False): onlineca_client.ca_cert_dir = TRUSTROOTS_DIR # Set up trust roots - trustroots = onlineca_client.get_trustroots( - TRUSTROOTS_SERVICE, - bootstrap=True, - write_to_ca_cert_dir=True) - - #username = input("CEDA username") - #password = getpass("CEDA password") + onlineca_client.get_trustroots(TRUSTROOTS_SERVICE, bootstrap=True, write_to_ca_cert_dir=True) + + # username = input("CEDA username") + # password = getpass("CEDA password") username = os.environ['CEDA_USERNAME'] password = os.environ['CEDA_PASSWORD'] - # Write certificate credentials file key_pair, certs = onlineca_client.get_certificate( - username, - password, - CERT_SERVICE, - pem_out_filepath=CREDENTIALS_FILE_PATH) + username, password, CERT_SERVICE, pem_out_filepath=CREDENTIALS_FILE_PATH + ) # Write the dodsrc credentials file write_dods_file_contents() @@ -117,13 +114,7 @@ def setup_credentials(force=False): return CREDENTIALS_FILE_PATH - - - -### +++++++++ here comes the actual recipe definition +++++++++++++++++++++++ -from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim -from pangeo_forge_recipes.recipes import XarrayZarrRecipe -from pangeo_forge_recipes.recipes import setup_logging +# +++++++++ here comes the actual recipe definition +++++++++++++++++++++++ def get_ssl(): @@ -132,18 +123,26 @@ def get_ssl(): return sslcontext -def make_url(time, variable, version="4.05"): - ## there is some peculiarities with 4.06 where the file pattern has an exception for cld - ## the version for cld is 4.06.01 and 4.06 for all other variables... - return f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/data/{variable}/cru_ts{version}.1901.1910.{variable}.dat.nc.gz' +def make_url(time, variable, version='4.05'): + # there is some peculiarities with 4.06 where the file pattern has an exception for cld + # the version for cld is 4.06.01 and 4.06 for all other variables... + return ( + f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/' + 'data/{variable}/cru_ts{version}.1901.2020.{variable}.dat.nc.gz' + ) + -# This is inspired by the EOBS feedstock: https://github.com/pangeo-forge/EOBS-feedstock/blob/main/feedstock/recipe.py +# This is inspired by the EOBS feedstock: +# https://github.com/pangeo-forge/EOBS-feedstock/blob/main/feedstock/recipe.py pattern = FilePattern( make_url, ConcatDim('time', keys=['']), - MergeDim(name='variable', keys=["cld", "dtr", "frs", "pet", "pre", "tmn", "tmp", "tmx", "vap", "wet"]), - fsspec_open_kwargs={'compression':'gzip', 'ssl': get_ssl()}, file_type="netcdf3" + MergeDim( + name='variable', keys=['cld', 'dtr', 'frs', 'pet', 'pre', 'tmn', 'tmp', 'tmx', 'vap', 'wet'] + ), + fsspec_open_kwargs={'compression': 'gzip', 'ssl': get_ssl()}, + file_type='netcdf3', ) -recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 40}) \ No newline at end of file +recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 40}) From 21d4ef04795a8d013b3b0e65d145cc8bedc108a4 Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Mon, 14 Nov 2022 20:02:14 +0100 Subject: [PATCH 4/7] added requirements.txt --- recipes/cru-ts/requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 recipes/cru-ts/requirements.txt diff --git a/recipes/cru-ts/requirements.txt b/recipes/cru-ts/requirements.txt new file mode 100644 index 0000000000..63700a69cb --- /dev/null +++ b/recipes/cru-ts/requirements.txt @@ -0,0 +1 @@ +ContrailOnlineCAClient From f0775207a0e3e24bbcae320ecc9c5425685e91e4 Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Tue, 15 Nov 2022 12:40:50 +0100 Subject: [PATCH 5/7] updates to CRU-TS 4.06 --- recipes/cru-ts/meta.yaml | 8 ++++---- recipes/cru-ts/recipe.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/recipes/cru-ts/meta.yaml b/recipes/cru-ts/meta.yaml index f116bf91bd..039b50e7cf 100644 --- a/recipes/cru-ts/meta.yaml +++ b/recipes/cru-ts/meta.yaml @@ -1,18 +1,18 @@ title: 'CRU-TS dataset' description: | - The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.05 data are month-by-month variations in climate over the period 1901-2020, + The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.06 data are month-by-month variations in climate over the period 1901-2020, provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for Atmospheric Science (NCAS), a NERC collaborative centre. - The CRU TS4.05 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), + The CRU TS4.06 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2020. - The CRU TS4.05 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. + The CRU TS4.06 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. Please see the release notes for full details of this version update. pangeo_forge_version: '0.9.0' recipes: - - id: cru-ts-4.05 + - id: cru-ts-4.06 object: 'recipe:recipe' provenance: providers: diff --git a/recipes/cru-ts/recipe.py b/recipes/cru-ts/recipe.py index d3c4758848..8f06920c80 100644 --- a/recipes/cru-ts/recipe.py +++ b/recipes/cru-ts/recipe.py @@ -123,12 +123,18 @@ def get_ssl(): return sslcontext -def make_url(time, variable, version='4.05'): +# for testing, set last_year=1910 for only quick download +def make_url(time, variable, last_year=None, version='4.06'): # there is some peculiarities with 4.06 where the file pattern has an exception for cld # the version for cld is 4.06.01 and 4.06 for all other variables... + if last_year is None: + last_year = 2015 + int(version[-1]) + var_version = version + if variable == 'cld' and version == '4.06': + var_version = version + '.01' return ( f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/' - 'data/{variable}/cru_ts{version}.1901.2020.{variable}.dat.nc.gz' + f'data/{variable}/cru_ts{var_version}.1901.{last_year}.{variable}.dat.nc.gz' ) @@ -145,4 +151,4 @@ def make_url(time, variable, version='4.05'): ) -recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 40}) +recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 120}) From 1429eaeba6d64e4fa2473540232982d25c18f53c Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer <5659125+larsbuntemeyer@users.noreply.github.com> Date: Tue, 15 Nov 2022 13:27:05 +0100 Subject: [PATCH 6/7] Update meta.yaml --- recipes/cru-ts/meta.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/recipes/cru-ts/meta.yaml b/recipes/cru-ts/meta.yaml index 039b50e7cf..64f16f394f 100644 --- a/recipes/cru-ts/meta.yaml +++ b/recipes/cru-ts/meta.yaml @@ -5,10 +5,9 @@ description: | Atmospheric Science (NCAS), a NERC collaborative centre. The CRU TS4.06 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), - precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2020. + precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2021. - The CRU TS4.06 data were produced using angular-distance weighting (ADW) interpolation. All versions prior to 4.00 used triangulation routines in IDL. - Please see the release notes for full details of this version update. + The CRU TS4.06 data were produced using angular-distance weighting (ADW) interpolation. pangeo_forge_version: '0.9.0' recipes: From 1155f1e0a38ea3401b681a1fc3a4c662239ccefd Mon Sep 17 00:00:00 2001 From: Lars Buntemeyer Date: Tue, 15 Nov 2022 14:00:54 +0100 Subject: [PATCH 7/7] added decode_times keyword to recipe --- recipes/cru-ts/recipe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipes/cru-ts/recipe.py b/recipes/cru-ts/recipe.py index 8f06920c80..885bf08f4f 100644 --- a/recipes/cru-ts/recipe.py +++ b/recipes/cru-ts/recipe.py @@ -151,4 +151,6 @@ def make_url(time, variable, last_year=None, version='4.06'): ) -recipe = XarrayZarrRecipe(pattern, target_chunks={'time': 120}) +recipe = XarrayZarrRecipe( + pattern, target_chunks={'time': 120}, xarray_open_kwargs={'decode_times': False} +)