Skip to content

Commit

Permalink
fix: add mdx generation (#188)
Browse files Browse the repository at this point in the history
* fix: add mdx generation

* fix: add test branch

* fix: trigger action

* fix: update test.json

* fix: add another test json file

* fix: add debugging steps

* fix: add more debugging, change filename reference

* fix: attempt to fix file path

* fix: check scripts directory

* fix: add checkout to step

* fix: add debugging

* fix: update dependency reference

* fix: udpate directory to write file

* fix: remove debugging steps

* fix: remove more debugging steps, update dataset.mdx

* Update scripts/generate-mdx.py

Co-authored-by: Alexandra Kirk <[email protected]>

* Update scripts/generate-mdx.py

Co-authored-by: Alexandra Kirk <[email protected]>

---------

Co-authored-by: Alexandra Kirk <[email protected]>
  • Loading branch information
botanical and anayeaye authored Dec 10, 2024
1 parent 93f64c5 commit 4a3ad2e
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 12 deletions.
24 changes: 13 additions & 11 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,22 +208,24 @@ jobs:
runs-on: ubuntu-latest
needs: publish-new-datasets
steps:
- name: Use output from dataset-publication-and-configuration
- name: Checkout code
uses: actions/checkout@v4

- name: Use output from dataset-publication-and-configuration
run: |
echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}"
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"
# Creates a slim dataset mdx file for each collection based on the dataset config json
- name: Create dataset mdx for given collections
env:
PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}
run: echo "NO-OP step"
# run: |
# pip install -r scripts/requirements.txt
# for file in "${PUBLISHED_COLLECTION_FILES[@]}"
# do
# python3 scripts/mdx.py "$file"
# done
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
run: |
echo $PUBLISHED_COLLECTION_FILES
pip install -r ./scripts/requirements.txt
for file in "${PUBLISHED_COLLECTION_FILES[@]}"
do
python3 ./scripts/generate-mdx.py "$file"
done
open-veda-config-pr:
runs-on: ubuntu-latest
Expand All @@ -238,7 +240,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Publish to production on PR merge
run: echo "NO-OP. This step runs when a PR is merged."
30 changes: 30 additions & 0 deletions ingestion-data/testing/dataset-config/test-2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"collection": "modis-ndvi-diff-2015-2022-TEST",
"title": "Camp Fire Domain: MODIS NDVI Difference",
"spatial_extent": {
"xmin": -122.21,
"ymin": 39.33,
"xmax": -120.91,
"ymax": 40.22
},
"temporal_extent": {
"startdate": "2022-12-31T00:00:00Z",
"enddate": "2022-12-31T23:59:59Z"
},
"data_type": "cog",
"license": "CC0-1.0",
"description": "MODIS NDVI difference from a three-year average of 2015 to 2018 subtracted from a three-year average of 2019-2022. These tri-annual averages represent periods before and after the fire.",
"is_periodic": true,
"time_density": "year",
"sample_files": [
"s3://veda-data-store-staging/modis-ndvi-diff-2015-2022/campfire_ndvi_difference_2015_2022.tif"
],
"discovery_items": [
{
"discovery": "s3",
"prefix": "modis-ndvi-diff-2015-2022/",
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)campfire_ndvi_difference_2015_2022.tif$"
}
]
}
2 changes: 1 addition & 1 deletion ingestion-data/testing/dataset-config/test.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"collection": "hls-swir-falsecolor-composite-TEST",
"collection": "hls-swir-falsecolor-composite-SECOND-TEST",
"title": "HLS SWIR FalseColor Composite",
"spatial_extent": {
"xmin": -156.75,
Expand Down
5 changes: 5 additions & 0 deletions scripts/dataset.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<Block>
<Prose>
[[REPLACE WITH RELEVANT DATASET INFORMATION]]
</Prose>
</Block>
127 changes: 127 additions & 0 deletions scripts/generate-mdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#! /usr/bin/env python
"""
This file creates a minimal <collection>.data.mdx file
from the input dataset config json file
Dependency: `dataset.mdx` file
"""

import yaml
import os
import json
import sys


def create_frontmatter(input_data):
"""
Creates json based on input dataset config
"""
collection_id = input_data["collection"]

json_data = {
"id": collection_id,
"name": input_data.get("title", "Dataset Title"),
"featured": False,
"description": input_data.get("description", "Dataset Description"),
"media": {
"src": "https://bootstrap-cheatsheet.themeselection.com/assets/images/bs-images/img-2x1.png",
"alt": "Placeholder image",
"author": {"name": "Media author", "url": ""},
},
"taxonomy": [
{"name": "Source", "values": ["NASA"]},
],
"infoDescription": """::markdown
- **Temporal Extent:** 2015 - 2100
- **Temporal Resolution:** Annual
- **Spatial Extent:** Global
- **Spatial Resolution:** 0.25 degrees x 0.25 degrees
- **Data Units:** Days (Days per year above 90°F or 110°F)
- **Data Type:** Research
""",
"layers": [],
}

for asset_id, asset in input_data.get("item_assets", {}).items():
layer = {
"id": f"{collection_id}-{asset_id}",
"stacCol": collection_id,
"name": asset.get("title", "Asset Title"),
"type": "raster",
"description": asset.get("description", "Asset Description"),
"zoomExtent": [0, 4],
"sourceParams": {
"assets": asset_id,
"resampling_method": "bilinear",
"colormap_name": "wistia",
"rescale": "0,365",
"maxzoom": 4,
},
"compare": {
"datasetId": collection_id,
"layerId": asset_id,
"mapLabel": (
"::js ({ dateFns, datetime, compareDatetime }) "
"=> {if (dateFns && datetime && compareDatetime)"
"return `${dateFns.format(datetime, 'yyyy')} "
"VS ${dateFns.format(compareDatetime, 'yyyy')}`;}"
),
},
"analysis": {"exclude": False, "metrics": ["mean"]},
"legend": {
"unit": {"label": "Days"},
"type": "gradient",
"min": 0,
"max": 365,
"stops": [
"#E4FF7A",
"#FAED2D",
"#FFCE0A",
"#FFB100",
"#FE9900",
"#FC7F00",
],
},
"info": {
"source": "NASA",
"spatialExtent": "Global",
"temporalResolution": "Annual",
"unit": "Days",
},
}
json_data["layers"].append(layer)

# Convert json to yaml for frontmatter
yaml_data = yaml.dump(json_data, sort_keys=False)

return yaml_data


def safe_open_w(path):
"""Open "path" for writing, creating any parent directories as needed."""
os.makedirs(os.path.dirname(path), exist_ok=True)
return open(path, "w")


if __name__ == "__main__":
input_data = json.load(open(sys.argv[1]))
dataset_config = create_frontmatter(input_data)
front_matter = f"---\n{dataset_config}---\n"

# Path to the existing file
curr_directory = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(curr_directory, "dataset.mdx")

# Read the existing content of the file
with open(file_path, "r") as file:
existing_content = file.read()

# Combine front matter and existing content
new_content = front_matter + existing_content

# Write the combined content back to the file
output_filepath = os.path.join(
curr_directory,
f"../ingestion-data/dataset-mdx/{input_data['collection']}.data.mdx",
)
with safe_open_w(output_filepath) as ofile:
ofile.write(new_content)
1 change: 1 addition & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyyaml

0 comments on commit 4a3ad2e

Please sign in to comment.