diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 8617b748..275d4591 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -208,22 +208,24 @@ jobs: runs-on: ubuntu-latest needs: publish-new-datasets steps: - - name: Use output from dataset-publication-and-configuration + - name: Checkout code + uses: actions/checkout@v4 + - name: Use output from dataset-publication-and-configuration run: | - echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}" + echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}" # Creates a slim dataset mdx file for each collection based on the dataset config json - name: Create dataset mdx for given collections env: - PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }} - run: echo "NO-OP step" - # run: | - # pip install -r scripts/requirements.txt - # for file in "${PUBLISHED_COLLECTION_FILES[@]}" - # do - # python3 scripts/mdx.py "$file" - # done + PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }} + run: | + echo $PUBLISHED_COLLECTION_FILES + pip install -r ./scripts/requirements.txt + for file in "${PUBLISHED_COLLECTION_FILES[@]}" + do + python3 ./scripts/generate-mdx.py "$file" + done open-veda-config-pr: runs-on: ubuntu-latest @@ -238,7 +240,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Publish to production on PR merge run: echo "NO-OP. This step runs when a PR is merged." diff --git a/ingestion-data/testing/dataset-config/test-2.json b/ingestion-data/testing/dataset-config/test-2.json new file mode 100644 index 00000000..47e80ade --- /dev/null +++ b/ingestion-data/testing/dataset-config/test-2.json @@ -0,0 +1,30 @@ +{ + "collection": "modis-ndvi-diff-2015-2022-TEST", + "title": "Camp Fire Domain: MODIS NDVI Difference", + "spatial_extent": { + "xmin": -122.21, + "ymin": 39.33, + "xmax": -120.91, + "ymax": 40.22 + }, + "temporal_extent": { + "startdate": "2022-12-31T00:00:00Z", + "enddate": "2022-12-31T23:59:59Z" + }, + "data_type": "cog", + "license": "CC0-1.0", + "description": "MODIS NDVI difference from a three-year average of 2015 to 2018 subtracted from a three-year average of 2019-2022. These tri-annual averages represent periods before and after the fire.", + "is_periodic": true, + "time_density": "year", + "sample_files": [ + "s3://veda-data-store-staging/modis-ndvi-diff-2015-2022/campfire_ndvi_difference_2015_2022.tif" + ], + "discovery_items": [ + { + "discovery": "s3", + "prefix": "modis-ndvi-diff-2015-2022/", + "bucket": "veda-data-store-staging", + "filename_regex": "(.*)campfire_ndvi_difference_2015_2022.tif$" + } + ] +} \ No newline at end of file diff --git a/ingestion-data/testing/dataset-config/test.json b/ingestion-data/testing/dataset-config/test.json index 0e3cb1e1..37c5244c 100644 --- a/ingestion-data/testing/dataset-config/test.json +++ b/ingestion-data/testing/dataset-config/test.json @@ -1,5 +1,5 @@ { - "collection": "hls-swir-falsecolor-composite-TEST", + "collection": "hls-swir-falsecolor-composite-SECOND-TEST", "title": "HLS SWIR FalseColor Composite", "spatial_extent": { "xmin": -156.75, diff --git a/scripts/dataset.mdx b/scripts/dataset.mdx new file mode 100644 index 00000000..e5851fbd --- /dev/null +++ b/scripts/dataset.mdx @@ -0,0 +1,5 @@ + + + [[REPLACE WITH RELEVANT DATASET INFORMATION]] + + \ No newline at end of file diff --git a/scripts/generate-mdx.py b/scripts/generate-mdx.py new file mode 100644 index 00000000..8ecc478d --- /dev/null +++ b/scripts/generate-mdx.py @@ -0,0 +1,127 @@ +#! /usr/bin/env python +""" +This file creates a minimal .data.mdx file +from the input dataset config json file +Dependency: `dataset.mdx` file +""" + +import yaml +import os +import json +import sys + + +def create_frontmatter(input_data): + """ + Creates json based on input dataset config + """ + collection_id = input_data["collection"] + + json_data = { + "id": collection_id, + "name": input_data.get("title", "Dataset Title"), + "featured": False, + "description": input_data.get("description", "Dataset Description"), + "media": { + "src": "https://bootstrap-cheatsheet.themeselection.com/assets/images/bs-images/img-2x1.png", + "alt": "Placeholder image", + "author": {"name": "Media author", "url": ""}, + }, + "taxonomy": [ + {"name": "Source", "values": ["NASA"]}, + ], + "infoDescription": """::markdown + - **Temporal Extent:** 2015 - 2100 + - **Temporal Resolution:** Annual + - **Spatial Extent:** Global + - **Spatial Resolution:** 0.25 degrees x 0.25 degrees + - **Data Units:** Days (Days per year above 90°F or 110°F) + - **Data Type:** Research + """, + "layers": [], + } + + for asset_id, asset in input_data.get("item_assets", {}).items(): + layer = { + "id": f"{collection_id}-{asset_id}", + "stacCol": collection_id, + "name": asset.get("title", "Asset Title"), + "type": "raster", + "description": asset.get("description", "Asset Description"), + "zoomExtent": [0, 4], + "sourceParams": { + "assets": asset_id, + "resampling_method": "bilinear", + "colormap_name": "wistia", + "rescale": "0,365", + "maxzoom": 4, + }, + "compare": { + "datasetId": collection_id, + "layerId": asset_id, + "mapLabel": ( + "::js ({ dateFns, datetime, compareDatetime }) " + "=> {if (dateFns && datetime && compareDatetime)" + "return `${dateFns.format(datetime, 'yyyy')} " + "VS ${dateFns.format(compareDatetime, 'yyyy')}`;}" + ), + }, + "analysis": {"exclude": False, "metrics": ["mean"]}, + "legend": { + "unit": {"label": "Days"}, + "type": "gradient", + "min": 0, + "max": 365, + "stops": [ + "#E4FF7A", + "#FAED2D", + "#FFCE0A", + "#FFB100", + "#FE9900", + "#FC7F00", + ], + }, + "info": { + "source": "NASA", + "spatialExtent": "Global", + "temporalResolution": "Annual", + "unit": "Days", + }, + } + json_data["layers"].append(layer) + + # Convert json to yaml for frontmatter + yaml_data = yaml.dump(json_data, sort_keys=False) + + return yaml_data + + +def safe_open_w(path): + """Open "path" for writing, creating any parent directories as needed.""" + os.makedirs(os.path.dirname(path), exist_ok=True) + return open(path, "w") + + +if __name__ == "__main__": + input_data = json.load(open(sys.argv[1])) + dataset_config = create_frontmatter(input_data) + front_matter = f"---\n{dataset_config}---\n" + + # Path to the existing file + curr_directory = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(curr_directory, "dataset.mdx") + + # Read the existing content of the file + with open(file_path, "r") as file: + existing_content = file.read() + + # Combine front matter and existing content + new_content = front_matter + existing_content + + # Write the combined content back to the file + output_filepath = os.path.join( + curr_directory, + f"../ingestion-data/dataset-mdx/{input_data['collection']}.data.mdx", + ) + with safe_open_w(output_filepath) as ofile: + ofile.write(new_content) diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..4818cc54 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +pyyaml \ No newline at end of file