feat: add script to call sm2a promotion pipeline #193
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This GitHub Actions workflow automates the process of | |
# publishing dataset collections to a staging environment | |
# It is triggered by a pull request to the main branch | |
# that modifies any files within the ingestion-data/dataset-config/ directory | |
# The workflow includes steps to | |
# - publish the datasets, | |
# - constantly updates the status of the workflow in the PR comment | |
name: Publish collection to staging | |
on: | |
pull_request: | |
branches: ['main'] | |
paths: | |
# Run the workflow only if files inside this path are updated | |
- ingestion-data/staging/dataset-config/* | |
push: | |
branches: | |
- main | |
permissions: | |
pull-requests: write | |
contents: read | |
jobs: | |
publish-new-datasets: | |
if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' || github.event.action == 'opened') }} | |
runs-on: ubuntu-latest | |
environment: staging | |
outputs: | |
publishedCollections: ${{ steps.publish-collections.outputs.success_collections }} | |
commentId: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
steps: | |
- uses: actions/checkout@v4 | |
# Initializes the PR comment | |
# Edits existing or creates new comment | |
# Why? - Cleanliness! | |
- name: Initialize PR comment with workflow start | |
id: init-comment | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
body="### Workflow Status | |
**Starting workflow...** [View action run]($WORKFLOW_URL)" | |
# Get the PR number | |
PR_NUMBER=${{ github.event.pull_request.number }} | |
# Fetch existing comments | |
COMMENTS=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments --jq '.[] | select(.body | contains("### Workflow Status")) | {id: .id, body: .body}') | |
# Check if a comment already exists | |
COMMENT_ID=$(echo "$COMMENTS" | jq -r '.id' | head -n 1) | |
if [ -z "$COMMENT_ID" ]; then | |
# No existing comment, create a new one | |
COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments -f body="$body" --jq '.id') | |
else | |
# Comment exists, overwrite the existing comment | |
gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X PATCH -f body="$body" | |
fi | |
echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT | |
# Find only the newly added files | |
# Only .json files | |
# The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps | |
- name: Get newly added files | |
id: changed-files | |
uses: tj-actions/changed-files@v45 | |
with: | |
files: | | |
**.json | |
- name: List all newly added files | |
env: | |
ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} | |
run: | | |
for file in ${ADDED_FILES}; do | |
echo "$file was added" | |
done | |
# Uses service client creds to get token | |
# No username/password needed | |
- name: Get auth token | |
id: get-token | |
run: | | |
echo "Vars: $vars" | |
response=$(curl -X POST \ | |
${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \ | |
-H "Content-Type: application/x-www-form-urlencoded" \ | |
-d "grant_type=client_credentials" \ | |
-d "client_id=${{ vars.STAGING_CLIENT_ID }}" \ | |
-d "client_secret=${{ secrets.STAGING_CLIENT_SECRET }}" | |
) | |
access_token=$(echo "$response" | jq -r '.access_token') | |
echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT | |
# Makes request to /dataset/publish endpoint | |
# Outputs only files that were successfully published | |
# Used by other steps | |
# If none of the requests are successful, workflow fails | |
# Updates the PR comment with status of collection publication | |
- name: Publish all newly added collections to staging | |
id: publish-collections | |
env: | |
ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} | |
WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }} | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
if [ -z "$WORKFLOWS_URL" ]; then | |
echo "WORKFLOWS_URL is not set" | |
exit 1 | |
fi | |
if [ -z "$AUTH_TOKEN" ]; then | |
echo "AUTH_TOKEN is not set" | |
exit 1 | |
fi | |
publish_url="${WORKFLOWS_URL%/}/dataset/publish" | |
bearer_token=$AUTH_TOKEN | |
# Track successful publications | |
all_failed=true | |
declare -a success_collections=() | |
status_message='### Collection Publication Status | |
' | |
for file in ${ADDED_FILES}; do | |
echo $file | |
if [ -f "$file" ]; then | |
dataset_config=$(jq '.' "$file") | |
collection_id=$(jq -r '.collection' "$file") | |
echo "Publishing $collection_id" | |
response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \ | |
-H "Content-Type: application/json" \ | |
-H "Authorization: Bearer $AUTH_TOKEN" \ | |
-d "$dataset_config" | |
) | |
status_code=$(tail -n1 <<< "$response") | |
# Update status message based on response code | |
if [ "$status_code" -eq 200 ] || [ "$status_code" -eq 201 ]; then | |
echo "$collection_id successfully published ✅" | |
status_message+="- **$collection_id**: Successfully published ✅ | |
" | |
success_collections+=("$file") | |
all_failed=false | |
else | |
echo "$collection_id failed to publish ❌" | |
status_message+="- **$collection_id**: Failed to publish. Error code $status_code. ❌ | |
" | |
fi | |
else | |
echo "File $file does not exist" | |
exit 1 | |
fi | |
done | |
# Exit workflow if all the requests fail | |
if [ "$all_failed" = true ]; then | |
echo "All collections failed to publish." | |
exit 1 | |
fi | |
# Output only successful collections to be used in subsequent steps | |
echo "success_collections=${success_collections[*]}" >> $GITHUB_OUTPUT | |
# Update PR comment | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
$status_message" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.9' | |
- uses: actions/cache@v4 | |
with: | |
path: ${{ env.pythonLocation }} | |
key: ${{ env.pythonLocation }}-pip-${{ hashFiles('requirements.txt') }} | |
# If the workflow fails at any point, the PR comment will be updated | |
- name: Update PR comment on overall workflow failure | |
if: failure() | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
create-mdx-files-and-open-pr: | |
runs-on: ubuntu-latest | |
environment: staging | |
needs: publish-new-datasets | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Use output from publish-new-datasets | |
run: | | |
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}" | |
# Creates a slim dataset mdx file for each collection based on the dataset config json | |
- name: Create dataset mdx for given collections | |
env: | |
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }} | |
run: | | |
echo $PUBLISHED_COLLECTION_FILES | |
collection_ids="" | |
pip install -r ./scripts/requirements.txt | |
for file in ${PUBLISHED_COLLECTION_FILES}; do | |
collection_id=$(python3 ./scripts/generate_mdx.py "$file") | |
collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//') | |
echo "Processed collection ID: $collection_id" | |
collection_ids="$collection_ids$collection_id," | |
done | |
# Remove trailing comma | |
collection_ids=${collection_ids%,} | |
echo "Final collection_ids: $collection_ids" | |
echo "collection_ids=${collection_ids}" >> $GITHUB_ENV | |
- name: Set up Variables | |
run: | | |
echo "VEDA_CONFIG_REPO=${{ vars.VEDA_CONFIG_REPO_ORG }}/${{ vars.VEDA_CONFIG_REPO_NAME }}" >> $GITHUB_ENV | |
- name: Clone veda-config repository | |
run: | | |
git clone https://github.com/${{ env.VEDA_CONFIG_REPO }}.git | |
ls | |
- name: Copy untracked mdx files to veda-config | |
run: | | |
echo "Copying untracked .mdx files to veda-config repository" | |
ls ./ingestion-data/dataset-mdx/ | |
mkdir -p datasets | |
find ingestion-data/dataset-mdx/ -name '*.mdx' -exec cp {} veda-config/datasets/ \; | |
- name: Create veda-config PR with changes | |
id: create-pr | |
env: | |
GITHUB_TOKEN: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} | |
COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} | |
PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }} | |
run: | | |
cd veda-config | |
git config --global user.name "github-actions[bot]" | |
git config --global user.email "github-actions[bot]@users.noreply.github.com" | |
git remote set-url origin https://${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}@github.com/${{ env.VEDA_CONFIG_REPO }} | |
files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}") | |
hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1) | |
NEW_BRANCH="add-dataset-$hash" | |
git fetch origin | |
if git ls-remote --exit-code --heads origin $NEW_BRANCH; then | |
git push origin --delete $NEW_BRANCH | |
fi | |
git checkout -b $NEW_BRANCH | |
git status | |
git add . | |
git commit -m "feat: add MDX files for dataset(s) [Automated workflow]" | |
git push origin $NEW_BRANCH | |
# Convert the comma-separated list into bullet points | |
collection_bullet_points="" | |
IFS=',' read -ra IDs <<< "$collection_ids" | |
# Extract the first collection ID | |
first_collection_id="${IDs[0]}" | |
for id in "${IDs[@]}"; do | |
collection_bullet_points+="- $id\n" | |
done | |
pr_title="Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]" | |
body="### Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]\n\n$collection_bullet_points" | |
echo "$body" | |
PR_URL=$(GITHUB_TOKEN=${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} gh pr create -R ${{ env.VEDA_CONFIG_REPO }} -H $NEW_BRANCH -B develop --title "$pr_title" --body "$(echo -e "$body")") | |
echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT | |
echo "PR creation succeeded!" | |
# Updates the comment with a link to the above PR | |
- name: Update PR comment with PR creation result | |
if: success() | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} | |
run: | | |
PR_URL=${{ steps.create-pr.outputs.PR_URL }} | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
**A PR has been created with the dataset configuration: 🗺️ [PR link]($PR_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
- name: Update PR comment on PR creation failure | |
if: failure() && steps.create-pr.outcome == 'failure' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} | |
run: | | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
**Failed ❌ to create a PR with the dataset configuration. 😔 **" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
# If the workflow fails at any point, the PR comment will be updated | |
- name: Update PR comment on overall workflow failure | |
if: failure() && steps.create-pr.outcome != 'failure' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
# Output WORKFLOW_URL to logs for verification | |
echo "Workflow URL: $WORKFLOW_URL" | |
** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
echo "Updated Comment Body: $UPDATED_BODY" | |
publish-to-prod-on-pr-merge: | |
# TEMPORARILY COMMENTED OUT TO TEST API REQUEST | |
# if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }} | |
runs-on: ubuntu-latest | |
environment: staging | |
needs: [publish-new-datasets, create-mdx-files-and-open-pr] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Use output from publish-new-datasets | |
run: | | |
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}" | |
- name: Publish to production on PR merge | |
env: | |
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }} | |
SM2A_ADMIN_USERNAME: ${{ secrets.SM2A_ADMIN_USERNAME }} | |
SM2A_ADMIN_PASSWORD: ${{ secrets.SM2A_ADMIN_PASSWORD }} | |
SM2A_API_URL: ${{ vars.SM2A_API_URL }} | |
PROMOTION_DAG: ${{ vars.PROMOTION_DAG_NAME }} | |
run: | | |
echo $PUBLISHED_COLLECTION_FILES | |
collection_ids="" | |
pip install -r ./scripts/requirements.txt | |
for file in ${PUBLISHED_COLLECTION_FILES}; do | |
python3 ./scripts/promote_to_production.py "$file" | |
echo "Processed file: $file" | |
done |