diff --git a/recipes/cru-ts/meta.yaml b/recipes/cru-ts/meta.yaml new file mode 100644 index 0000000000..64f16f394f --- /dev/null +++ b/recipes/cru-ts/meta.yaml @@ -0,0 +1,37 @@ +title: 'CRU-TS dataset' +description: | + The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.06 data are month-by-month variations in climate over the period 1901-2020, + provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for + Atmospheric Science (NCAS), a NERC collaborative centre. + + The CRU TS4.06 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET), + precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2021. + + The CRU TS4.06 data were produced using angular-distance weighting (ADW) interpolation. + +pangeo_forge_version: '0.9.0' +recipes: + - id: cru-ts-4.06 + object: 'recipe:recipe' +provenance: + providers: + - name: 'CEDA' + description: 'Centre for Environmental Data Analysis' + roles: + - provider + url: https://catalogue.ceda.ac.uk/uuid/c26a65020a5e4b80b20018f148556681 + - name: 'CRU' + description: 'Climatic Research Unit' + roles: + - producer + - licensor + url: https://crudata.uea.ac.uk/cru/data/hrg + license: 'Open Government License' + license_link: https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/ +maintainers: + - name: 'Lars Buntemeyer' + orcid: '0000-0002-0849-2404' + github: larsbuntemeyer + +bakery: + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/cru-ts/recipe.py b/recipes/cru-ts/recipe.py new file mode 100644 index 0000000000..885bf08f4f --- /dev/null +++ b/recipes/cru-ts/recipe.py @@ -0,0 +1,156 @@ +# The authentication is directly copied from: +# +# https://github.com/cedadev/opendap-python-example/blob/master/simple_file_downloader.py +# +import datetime +import os +import ssl + +# from contrail.security.online_ca_client import OnlineCaClient +# pip install ContrailOnlineCAClient +from contrail.security.onlineca.client import OnlineCaClient +from cryptography import x509 +from cryptography.hazmat.backends import default_backend + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim +from pangeo_forge_recipes.recipes import XarrayZarrRecipe + +# Credentials defaults +DODS_FILE_CONTENTS = """HTTP.COOKIEJAR=./dods_cookies +HTTP.SSL.CERTIFICATE=./credentials.pem +HTTP.SSL.KEY=./credentials.pem +HTTP.SSL.CAPATH=./ca-trustroots +""" + +DODS_FILE_PATH = os.path.expanduser('~/.dodsrc') +CERTS_DIR = os.path.expanduser('~/.certs') + +if not os.path.isdir(CERTS_DIR): + os.makedirs(CERTS_DIR) + +TRUSTROOTS_DIR = os.path.join(CERTS_DIR, 'ca-trustroots') +CREDENTIALS_FILE_PATH = os.path.join(CERTS_DIR, 'credentials.pem') + +TRUSTROOTS_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/trustroots/' +CERT_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/certificate/' + + +def write_dods_file_contents(): + + DODS_FILE_CONTENTS = """ + HTTP.COOKIEJAR=./dods_cookies + HTTP.SSL.CERTIFICATE={credentials_file_path} + HTTP.SSL.KEY={credentials_file_path} + HTTP.SSL.CAPATH={trustroots_dir} + """.format( + credentials_file_path=CREDENTIALS_FILE_PATH, trustroots_dir=TRUSTROOTS_DIR + ) + + with open(DODS_FILE_PATH, 'w') as dods_file: + dods_file.write(DODS_FILE_CONTENTS) + + +def cert_is_valid(cert_file, min_lifetime=0): + """ + Returns boolean - True if the certificate is in date. + Optional argument min_lifetime is the number of seconds + which must remain. + :param cert_file: certificate file path. + :param min_lifetime: minimum lifetime (seconds) + :return: boolean + """ + try: + with open(cert_file, 'rb') as f: + crt_data = f.read() + except OSError: + return False + + try: + cert = x509.load_pem_x509_certificate(crt_data, default_backend()) + except ValueError: + return False + + now = datetime.datetime.now() + + return cert.not_valid_before <= now and cert.not_valid_after > now + datetime.timedelta( + 0, min_lifetime + ) + + +def setup_credentials(force=False): + """ + Download and create required credentials files. + Return True if credentials were set up. + Return False is credentials were already set up. + :param force: boolean + :return: boolean + """ + # Test for DODS_FILE and only re-get credentials if it doesn't + # exist AND `force` is True AND certificate is in-date. + if os.path.isfile(DODS_FILE_PATH) and not force and cert_is_valid(CREDENTIALS_FILE_PATH): + print('[INFO] Security credentials already set up.') + return CREDENTIALS_FILE_PATH + + onlineca_client = OnlineCaClient() + onlineca_client.ca_cert_dir = TRUSTROOTS_DIR + + # Set up trust roots + onlineca_client.get_trustroots(TRUSTROOTS_SERVICE, bootstrap=True, write_to_ca_cert_dir=True) + + # username = input("CEDA username") + # password = getpass("CEDA password") + username = os.environ['CEDA_USERNAME'] + password = os.environ['CEDA_PASSWORD'] + + # Write certificate credentials file + key_pair, certs = onlineca_client.get_certificate( + username, password, CERT_SERVICE, pem_out_filepath=CREDENTIALS_FILE_PATH + ) + + # Write the dodsrc credentials file + write_dods_file_contents() + + print('[INFO] Security credentials set up.') + return CREDENTIALS_FILE_PATH + + +# +++++++++ here comes the actual recipe definition +++++++++++++++++++++++ + + +def get_ssl(): + sslcontext = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH) + sslcontext.load_cert_chain(setup_credentials()) + return sslcontext + + +# for testing, set last_year=1910 for only quick download +def make_url(time, variable, last_year=None, version='4.06'): + # there is some peculiarities with 4.06 where the file pattern has an exception for cld + # the version for cld is 4.06.01 and 4.06 for all other variables... + if last_year is None: + last_year = 2015 + int(version[-1]) + var_version = version + if variable == 'cld' and version == '4.06': + var_version = version + '.01' + return ( + f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/' + f'data/{variable}/cru_ts{var_version}.1901.{last_year}.{variable}.dat.nc.gz' + ) + + +# This is inspired by the EOBS feedstock: +# https://github.com/pangeo-forge/EOBS-feedstock/blob/main/feedstock/recipe.py +pattern = FilePattern( + make_url, + ConcatDim('time', keys=['']), + MergeDim( + name='variable', keys=['cld', 'dtr', 'frs', 'pet', 'pre', 'tmn', 'tmp', 'tmx', 'vap', 'wet'] + ), + fsspec_open_kwargs={'compression': 'gzip', 'ssl': get_ssl()}, + file_type='netcdf3', +) + + +recipe = XarrayZarrRecipe( + pattern, target_chunks={'time': 120}, xarray_open_kwargs={'decode_times': False} +) diff --git a/recipes/cru-ts/requirements.txt b/recipes/cru-ts/requirements.txt new file mode 100644 index 0000000000..63700a69cb --- /dev/null +++ b/recipes/cru-ts/requirements.txt @@ -0,0 +1 @@ +ContrailOnlineCAClient