You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# python3.8importboto3importjsonimportdatetimefrompypgstacimportpypgstacimportrio_stac# tqdm provides progress barsfromtqdmimporttqdmimportconcurrent.futuresimportos# GDAL attempts to list the directory when it opens a file, in order to # find "sidecar" files. This setting tells GDAL to assume the directory# is empty when opening a file, saving both on S3 LIST costs and # runtime. See: https://github.com/OSGeo/gdal/issues/909os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] ="EMPTY_DIR"# use `profile_name: str` param `Session()` or default AWS profile to ensure # correct accessBUCKET=boto3.Session().resource("s3").Bucket("climatedashboard-data")
defcreate_stac_item(obj: boto3.resources.factory.s3.ObjectSummary) ->Union[pystac.Item, str]:
""" Generates a STAC Item object from a CMIP6 data file in S3 :param obj: The S3 object summary of the file for which to create a STAC Item :returns: STAC Item :returns: str if STAC Item generation failed :raises Exception: if unable to extract variable name, date or SSP from filename """filename=obj.key.split("/")[-1]
ssp=filename.split("_")[3]
date=filename.split("_")[-1].replace(".tif", "")
var=filename.split("_")[0]
ifnotsspin ["ssp245", "ssp585"]:
raiseException("Unable to extract ssp from filename: ", obj.key)
ifnot195000<int(date)<2102101:
raiseException("Unable to extract date from filename: ", obj.key)
ifnotvarin_vars:
raiseException("Unable to extract date from filename: ", obj.key)
try:
r=rio_stac.stac.create_stac_item(
source=f"s3://{obj.bucket_name}/{obj.key}",
collection=var,
properties= {"cmip6:model":"ensemble", "cmip6:ssp":ssp, "cmip6:variable": var},
input_datetime=datetime.datetime.strptime(date, "%Y%m"),
#extensions=None # TODO: official cmip6 extension? #collection_url=None # TODOwith_proj=True,
with_raster=True
)
except:
returnf"FAILED:{obj.key}"returnrif__name__=="__main__":
# see below for `cmip6-collections.json` contentwithopen("./cmip6-collections.json", "r") asf:
collections= [json.loads(x) forxinf.readlines()]
_vars= [c["id"] forcincollections]
# S3 prefix for searching prefix="cmip6/monthly/CMIP6_ensemble_median"# collects ensemble data across all variablesobjs= [iforvarin_varsforiinBUCKET.objects.filter(Prefix=f"{prefix}/{var}/") if"historical"notini.key]
# Executes in ~20 minutes withconcurrent.futures.ThreadPoolExecutor(max_workers=25) asexecutor:
results=list(
tqdm(
executor.map(
lambdax: create_stac_item(x),
objs
),
total=len(objs) # sets total length of progressbar
)
)
# Verify no failures: failed= [xforxinresultsifisinstance(x, str) andx.startswith("FAILED:")]
iflen(failed):
print(f"FAILED: {len(failed)} (of {len(results)}). Aborting...")
exit()
# sort by date (to optimize loading) and dump to file for safekeepingwithopen("cmip6-monthly-ensemble-items-sorted.json", "w") asf:
f.write("\n".join([json.dumps(x.to_dict()) forxinsorted(success, key=lambdax: x.to_dict()["properties"]["datetime"])]))
# if loading from file, uncomment the next 2 lines: # with open("cmip6-monthly-ensemble-items-sorted.json", "r") as f:# items = [json.loads(x) for x in f.readlines()]# chunk items to load into year long chunks to avoid memory issues# since pgstac uses staging tables and transactions to pre-process # and then insert the dataforiinrange(1950, 2100):
chunk= [xforxinitemsifx["properties"]["datetime"].startswith(str(i))]
print(
f"Timerange: {chunk[0]['properties']['datetime']} - {chunk[-1]['properties']['datetime']}, Count: {len(chunk)}"
)
# Write to temp file (since pypgstac only loads from file)withopen("cmip6-monthly-ensemble-items-temp.json", "w") asf:
f.write("\n".join([json.dumps(x) forxinchunk]))
pypgstac.load(
table="items",
file="cmip6-monthly-ensemble-items-temp.json",
dsn="postgres://[USERNAME]:[PASSWORD]@[HOST]/postgis",
method="insert_ignore", # use insert_ignore to avoid overwritting existing items
)
# remove temp fileos.remove("cmip6-monthly-ensemble-items-temp.json")
cmip6-collections.json :
{"id":"hurs","title":"Near-Surface Relative Humidity","description":"Near-Surface Relative Humidity, %","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"huss","title":"Near-Surface Specific Humidity","description":"Near-Surface Specific Humidity dimensionless ratio (kg/kg)","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"pr","title":"Precipitation","description":"Precipitation (mean of the daily precipitation rate) mm/month","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"rlds","title":"Surface Downwelling Longwave Radiation","description":"Surface Downwelling Longwave Radiation W m⁻²","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"rsds","title":"Surface Downwelling Shortwave Radiation","description":"Surface Downwelling Shortwave Radiation W m⁻²","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"sfcWind","title":"Daily-Mean Near-Surface Wind Speed","description":"Daily-Mean Near-Surface Wind Speed m s⁻¹","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"tas","title":"Daily Near-Surface Air Temperature","description":"Daily Near-Surface Air Temperature Degrees Kelvin (convert to C)","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"tasmax","title":"Maximum Daily Near-Surface Air Temperature","description":"Maximum Daily Near-Surface Air Temperature Degrees Kelvin (convert to C)","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
{"id":"tasmin","title":"Minimum Daily Near-Surface Air Temperature","description":"Surface Downwelling Longwave Radiation W m⁻²","stac_version":"1.0.0","license":"public-domain","links":[],"extent":{"spatial":{"bbox":[[-180,-60,180,90]]},"temporal":{"interval":[["1950-01-01T00:00:00Z","2100-01-01T00:00:00Z"]]}}}
Usage:
TODO
The text was updated successfully, but these errors were encountered:
STAC Items for the monthly ensemble averages of the CMIP6 dataset have been generated and uploaded to the staging STAC API. See: https://bgp41fmlrh.execute-api.us-east-1.amazonaws.com/collections/huss/items?limit=10
Code walk through:
cmip6-collections.json
:Usage:
TODO
The text was updated successfully, but these errors were encountered: