feat: update pr.yml to open veda-config pr (#216)

* feat: update pr.yml to open veda-config pr and publish collections * feat: add test dataset-config * fix: add debugging * fix: add more debugging statements * fix: add environment to job * fix: testing * fix: add debugging for directory * fix: add github debugging * fix: add logs to create-mdx-files step * fix: add step to output files * fix: update ls path * fix: replace git clone with checkout * fix: typo * fix: merge steps * fix: add environment * fix: try to copy generated file to new directory in new repo * fix: add modifications for copying untracked file * fix: create source directory * fix: update copying commands * fix: update command * fix: add debugging for copying file * fix: add more logging, remove extraneous copy * fix: remove ls path * fix: attempt to create pr * fix: update pr creation command to specify org/repo directly * fix: add debugging * fix: check branch * fix: change token reference * fix: add branch debugging * fix: add debugging token access step * fix: try another syntax for token supplying * fix: update token commands * fix: more debugging target repo * fix: try checking out target repo another way * fix: typo * fix: remove prev debugging token step * fix: change cloning step * fix: cd into cloned repo * fix: simplify * fix: update untracked file copying step * fix: typo * fix: cd into cloned repo * fix: reorder steps, see if it fixes issue * fix: print working directory * fix: condense steps * fix: add commands to add and commit file * fix: modify commit message, add logs * fix: authenticate git * fix: add other steps to update comment in veda-data pr * fix: add debugging to new step * fix: update env var setting * fix: update comment-id references * fix: update output to include commentId * fix: echo comment_id * fix: update comment id extraction * fix: add anayeaye changes * fix: clean up logging * fix: update step name * fix: update to include github actor and try to extract collection * fix: reformat file * fix: update to handle list of collections * fix: escape special chars * fix: add more cleaning to collection id * fix: add debugging for collection id extraction * fix: try another approach for updating gh body * fix: update pr_url * fix: cleanup by extracting pr title and body * fix: use -e flag for echo to fix newlines
NASA-IMPACT · Dec 20, 2024 · a744b05 · a744b05
1 parent 6efe7c7
commit a744b05
Show file tree

Hide file tree

Showing 3 changed files with 157 additions and 14 deletions.
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -30,6 +30,7 @@ jobs:
     environment: staging
     outputs:
       publishedCollections: ${{ steps.publish-collections.outputs.success_collections }}
+      commentId: ${{ steps.init-comment.outputs.COMMENT_ID }}
     steps:
       - uses: actions/checkout@v4
 
@@ -128,16 +129,17 @@ jobs:
 
           # Track successful publications
           all_failed=true
-          success_collections=()
+          declare -a success_collections=()
           status_message='### Collection Publication Status
           '
 
-          for file in "${ADDED_FILES[@]}"; do
+          for file in ${ADDED_FILES}; do
             echo $file
             if [ -f "$file" ]; then
               dataset_config=$(jq '.' "$file")
               collection_id=$(jq -r '.collection' "$file")
 
+              echo "Publishing $collection_id"
               response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \
                 -H "Content-Type: application/json" \
                 -H "Authorization: Bearer $AUTH_TOKEN" \
@@ -171,7 +173,7 @@ jobs:
           fi
 
           # Output only successful collections to be used in subsequent steps
-          echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT
+          echo "success_collections=${success_collections[*]}" >> $GITHUB_OUTPUT
 
           # Update PR comment
           CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
@@ -203,14 +205,15 @@ jobs:
           ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
           gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
 
-  create-mdx-files:
+  create-mdx-files-and-open-pr:
     runs-on: ubuntu-latest
+    environment: staging
     needs: publish-new-datasets
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
-      - name: Use output from dataset-publication-and-configuration
+      - name: Use output from publish-new-datasets
         run: |
           echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"
 
@@ -220,19 +223,125 @@ jobs:
           PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
         run: |
           echo $PUBLISHED_COLLECTION_FILES
+          collection_ids=""
           pip install -r ./scripts/requirements.txt
-          for file in "${PUBLISHED_COLLECTION_FILES[@]}"
-          do
-            python3 ./scripts/generate-mdx.py "$file"
+          for file in ${PUBLISHED_COLLECTION_FILES}; do
+            collection_id=$(python3 ./scripts/generate-mdx.py "$file")
+            collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//')
+            echo "Processed collection ID: $collection_id"
+            collection_ids="$collection_ids$collection_id,"
           done
+          # Remove trailing comma
+          collection_ids=${collection_ids%,}
+          echo "Final collection_ids: $collection_ids"
+          echo "collection_ids=${collection_ids}" >> $GITHUB_ENV
 
-  open-veda-config-pr:
-    runs-on: ubuntu-latest
-    needs: create-mdx-files
-    steps:
-      - name: Open veda-config PR
+      - name: Set up Variables
         run: |
-          echo "NO-OP. Placeholder for future job that will open a Pull Request in veda-config for a dashboard preview for the new/changed datasets."
+          echo "VEDA_CONFIG_REPO=${{ vars.VEDA_CONFIG_REPO_ORG }}/${{ vars.VEDA_CONFIG_REPO_NAME }}" >> $GITHUB_ENV
+
+      - name: Clone veda-config repository
+        run: |
+          git clone https://github.com/${{ env.VEDA_CONFIG_REPO }}.git
+          ls
+
+      - name: Copy untracked mdx files to veda-config
+        run: |
+          echo "Copying untracked .mdx files to veda-config repository"
+          ls ./ingestion-data/dataset-mdx/
+          mkdir -p datasets
+          find ingestion-data/dataset-mdx/ -name '*.mdx' -exec cp {} veda-config/datasets/ \;
+
+      - name: Create veda-config PR with changes
+        id: create-pr
+        env:
+          GITHUB_TOKEN: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}
+          COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }}
+          PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }}
+        run: |
+          cd veda-config
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git remote set-url origin https://${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}@github.com/${{ env.VEDA_CONFIG_REPO }}
+
+          files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}")
+          hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1)
+          NEW_BRANCH="add-dataset-$hash"
+
+          git fetch origin
+          if git ls-remote --exit-code --heads origin $NEW_BRANCH; then
+            git push origin --delete $NEW_BRANCH
+          fi
+          git checkout -b $NEW_BRANCH
+
+          git status
+          git add .
+          git commit -m "feat: add MDX files for dataset(s) [Automated workflow]"
+          git push origin $NEW_BRANCH
+
+          # Convert the comma-separated list into bullet points
+          collection_bullet_points=""
+          IFS=',' read -ra IDs <<< "$collection_ids"
+
+          # Extract the first collection ID
+          first_collection_id="${IDs[0]}"
+          for id in "${IDs[@]}"; do
+            collection_bullet_points+="- $id\n"
+          done
+
+          pr_title="Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]"
+          body="### Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]\n\n$collection_bullet_points"
+
+          echo "$body"
+          PR_URL=$(GITHUB_TOKEN=${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} gh pr create -R ${{ env.VEDA_CONFIG_REPO }} -H $NEW_BRANCH -B develop --title "$pr_title" --body "$(echo -e "$body")")
+
+          echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT
+          echo "PR creation succeeded!"
+
+      # Updates the comment with a link to the above PR
+      - name: Update PR comment with PR creation result
+        if: success()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }}
+        run: |
+         PR_URL=${{ steps.create-pr.outputs.PR_URL }}
+          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
+          UPDATED_BODY="$CURRENT_BODY
+
+          **A PR has been created with the dataset configuration: 🗺️ [PR link]($PR_URL)**"
+          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+
+      - name: Update PR comment on PR creation failure
+        if: failure() && steps.create-pr.outcome == 'failure'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }}
+        run: |
+          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
+          UPDATED_BODY="$CURRENT_BODY
+
+          **Failed ❌ to create a PR with the dataset configuration. 😔 **"
+          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+
+      # If the workflow fails at any point, the PR comment will be updated
+      - name: Update PR comment on overall workflow failure
+        if: failure() && steps.create-pr.outcome != 'failure'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }}
+        run: |
+          WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
+          UPDATED_BODY="$CURRENT_BODY
+
+          # Output WORKFLOW_URL to logs for verification
+          echo "Workflow URL: $WORKFLOW_URL"
+
+          ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
+          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+
+          echo "Updated Comment Body: $UPDATED_BODY"
 
   publish-to-prod-on-pr-merge:
     if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}

diff --git a/ingestion-data/staging/dataset-config/test.json b/ingestion-data/staging/dataset-config/test.json
@@ -0,0 +1,31 @@
+{
+    "collection": "hls-swir-falsecolor-composite-THIRD-TEST",
+    "title": "HLS SWIR FalseColor Composite",
+    "spatial_extent": {
+        "xmin": -156.75,
+        "ymin": 20.80,
+        "xmax": -156.55,
+        "ymax": 20.94
+    },
+    "temporal_extent": {
+        "startdate": "2023-08-08T00:00:00Z",
+        "enddate": "2023-08-08T23:59:59Z"
+    },
+    "data_type": "cog",
+    "license": "CC0-1.0",
+    "description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.",
+    "is_periodic": false,
+    "time_density": "day",
+    "sample_files": [
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif",
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif"
+    ],
+    "discovery_items": [
+        {
+            "discovery": "s3",
+            "prefix": "maui-fire/",
+            "bucket": "veda-data-store-staging",
+            "filename_regex": "(.*)SWIR_falsecolor(.*).tif$"
+        }
+    ]
+}
diff --git a/scripts/generate-mdx.py b/scripts/generate-mdx.py
@@ -125,3 +125,6 @@ def safe_open_w(path):
     )
     with safe_open_w(output_filepath) as ofile:
         ofile.write(new_content)
+
+    collection_id = input_data["collection"]
+    print(collection_id)