Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRU-TS 4.06 #220

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions recipes/cru-ts/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
title: 'CRU-TS dataset'
description: |
The gridded Climatic Research Unit (CRU) Time-series (TS) data version 4.06 data are month-by-month variations in climate over the period 1901-2020,
provided on high-resolution (0.5x0.5 degree) grids, produced by CRU at the University of East Anglia and funded by the UK National Centre for
Atmospheric Science (NCAS), a NERC collaborative centre.

The CRU TS4.06 variables are cloud cover, diurnal temperature range, frost day frequency, wet day frequency, potential evapotranspiration (PET),
precipitation, daily mean temperature, monthly average daily maximum and minimum temperature, and vapour pressure for the period January 1901 - December 2021.

The CRU TS4.06 data were produced using angular-distance weighting (ADW) interpolation.

pangeo_forge_version: '0.9.0'
recipes:
- id: cru-ts-4.06
object: 'recipe:recipe'
provenance:
providers:
- name: 'CEDA'
description: 'Centre for Environmental Data Analysis'
roles:
- provider
url: https://catalogue.ceda.ac.uk/uuid/c26a65020a5e4b80b20018f148556681
- name: 'CRU'
description: 'Climatic Research Unit'
roles:
- producer
- licensor
url: https://crudata.uea.ac.uk/cru/data/hrg
license: 'Open Government License'
license_link: https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/
maintainers:
- name: 'Lars Buntemeyer'
orcid: '0000-0002-0849-2404'
github: larsbuntemeyer

bakery:
id: 'pangeo-ldeo-nsf-earthcube'
156 changes: 156 additions & 0 deletions recipes/cru-ts/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# The authentication is directly copied from:
#
# https://github.com/cedadev/opendap-python-example/blob/master/simple_file_downloader.py
#
import datetime
import os
import ssl

# from contrail.security.online_ca_client import OnlineCaClient
# pip install ContrailOnlineCAClient
from contrail.security.onlineca.client import OnlineCaClient
from cryptography import x509
from cryptography.hazmat.backends import default_backend

from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim
from pangeo_forge_recipes.recipes import XarrayZarrRecipe

# Credentials defaults
DODS_FILE_CONTENTS = """HTTP.COOKIEJAR=./dods_cookies
HTTP.SSL.CERTIFICATE=./credentials.pem
HTTP.SSL.KEY=./credentials.pem
HTTP.SSL.CAPATH=./ca-trustroots
"""

DODS_FILE_PATH = os.path.expanduser('~/.dodsrc')
CERTS_DIR = os.path.expanduser('~/.certs')

if not os.path.isdir(CERTS_DIR):
os.makedirs(CERTS_DIR)

TRUSTROOTS_DIR = os.path.join(CERTS_DIR, 'ca-trustroots')
CREDENTIALS_FILE_PATH = os.path.join(CERTS_DIR, 'credentials.pem')

TRUSTROOTS_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/trustroots/'
CERT_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/certificate/'


def write_dods_file_contents():

DODS_FILE_CONTENTS = """
HTTP.COOKIEJAR=./dods_cookies
HTTP.SSL.CERTIFICATE={credentials_file_path}
HTTP.SSL.KEY={credentials_file_path}
HTTP.SSL.CAPATH={trustroots_dir}
""".format(
credentials_file_path=CREDENTIALS_FILE_PATH, trustroots_dir=TRUSTROOTS_DIR
)

with open(DODS_FILE_PATH, 'w') as dods_file:
dods_file.write(DODS_FILE_CONTENTS)


def cert_is_valid(cert_file, min_lifetime=0):
"""
Returns boolean - True if the certificate is in date.
Optional argument min_lifetime is the number of seconds
which must remain.
:param cert_file: certificate file path.
:param min_lifetime: minimum lifetime (seconds)
:return: boolean
"""
try:
with open(cert_file, 'rb') as f:
crt_data = f.read()
except OSError:
return False

try:
cert = x509.load_pem_x509_certificate(crt_data, default_backend())
except ValueError:
return False

now = datetime.datetime.now()

return cert.not_valid_before <= now and cert.not_valid_after > now + datetime.timedelta(
0, min_lifetime
)


def setup_credentials(force=False):
"""
Download and create required credentials files.
Return True if credentials were set up.
Return False is credentials were already set up.
:param force: boolean
:return: boolean
"""
# Test for DODS_FILE and only re-get credentials if it doesn't
# exist AND `force` is True AND certificate is in-date.
if os.path.isfile(DODS_FILE_PATH) and not force and cert_is_valid(CREDENTIALS_FILE_PATH):
print('[INFO] Security credentials already set up.')
return CREDENTIALS_FILE_PATH

onlineca_client = OnlineCaClient()
onlineca_client.ca_cert_dir = TRUSTROOTS_DIR

# Set up trust roots
onlineca_client.get_trustroots(TRUSTROOTS_SERVICE, bootstrap=True, write_to_ca_cert_dir=True)

# username = input("CEDA username")
# password = getpass("CEDA password")
username = os.environ['CEDA_USERNAME']
password = os.environ['CEDA_PASSWORD']

# Write certificate credentials file
key_pair, certs = onlineca_client.get_certificate(
username, password, CERT_SERVICE, pem_out_filepath=CREDENTIALS_FILE_PATH
)

# Write the dodsrc credentials file
write_dods_file_contents()

print('[INFO] Security credentials set up.')
return CREDENTIALS_FILE_PATH


# +++++++++ here comes the actual recipe definition +++++++++++++++++++++++


def get_ssl():
sslcontext = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH)
sslcontext.load_cert_chain(setup_credentials())
return sslcontext


# for testing, set last_year=1910 for only quick download
def make_url(time, variable, last_year=None, version='4.06'):
# there is some peculiarities with 4.06 where the file pattern has an exception for cld
# the version for cld is 4.06.01 and 4.06 for all other variables...
if last_year is None:
last_year = 2015 + int(version[-1])
var_version = version
if variable == 'cld' and version == '4.06':
var_version = version + '.01'
return (
f'https://dap.ceda.ac.uk/badc/cru/data/cru_ts/cru_ts_{version}/'
f'data/{variable}/cru_ts{var_version}.1901.{last_year}.{variable}.dat.nc.gz'
)


# This is inspired by the EOBS feedstock:
# https://github.com/pangeo-forge/EOBS-feedstock/blob/main/feedstock/recipe.py
pattern = FilePattern(
make_url,
ConcatDim('time', keys=['']),
MergeDim(
name='variable', keys=['cld', 'dtr', 'frs', 'pet', 'pre', 'tmn', 'tmp', 'tmx', 'vap', 'wet']
),
fsspec_open_kwargs={'compression': 'gzip', 'ssl': get_ssl()},
file_type='netcdf3',
)


recipe = XarrayZarrRecipe(
pattern, target_chunks={'time': 120}, xarray_open_kwargs={'decode_times': False}
)
1 change: 1 addition & 0 deletions recipes/cru-ts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ContrailOnlineCAClient