Test Illumina Genotyping Array #592
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Illumina Genotyping Array | |
# Controls when the workflow will run | |
on: | |
#run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING | |
# push: | |
# branches: | |
# - kp_GHA_Terra_auth_PD-2682 | |
pull_request: | |
branches: [ "develop", "staging", "master" ] | |
# Only run if files in these paths changed: | |
#################################### | |
# SET PIPELINE SPECIFIC PATHS HERE # | |
#################################### | |
paths: | |
- 'pipelines/broad/genotyping/illumina/**' | |
- 'tasks/broad/IlluminaGenotypingArrayTasks.wdl' | |
- 'tasks/broad/Qc.wdl' | |
- 'verification/VerifyIlluminaGenotypingArray.wdl' | |
- 'verification/test-wdls/TestIlluminaGenotypingArray.wdl' | |
- 'tasks/broad/Utilities.wdl' | |
- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' | |
- '.github/workflows/test_illumina_genotyping_array.yml' | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
inputs: | |
useCallCache: | |
description: 'Use call cache (default: true)' | |
required: false | |
default: "true" | |
updateTruth: | |
description: 'Update truth files (default: false)' | |
required: false | |
default: "false" | |
testType: | |
description: 'Specify the type of test (Plumbing or Scientific)' | |
required: true | |
truthBranch: | |
description: 'Specify the branch for truth files (default: master)' | |
required: false | |
default: "master" | |
env: | |
PROJECT_NAME: WARP | |
# Github repo name | |
REPOSITORY_NAME: ${{ github.event.repository.name }} | |
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} | |
jobs: | |
run_pipeline: | |
runs-on: ubuntu-latest | |
# Add "id-token" with the intended permissions. | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
# Add a step to wait to account for github -> dockstore -> terra delays | |
- name: Wait Before Starting | |
run: | | |
echo "Waiting for 5 minutes before starting..." | |
sleep 1 # time in seconds, update this when we really want a delay | |
# actions/checkout MUST come before auth | |
- uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.ref }} | |
# id: 'auth' | |
# name: 'Authenticate to Google Cloud' | |
# uses: 'google-github-actions/auth@v2' | |
# with: | |
# token_format: 'access_token' | |
# # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting | |
# # This is provided by the DevOps team - do not change! | |
# workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider' | |
# # This is our tester service account | |
# service_account: '[email protected]' | |
# access_token_lifetime: '3600' # seconds, default is 3600 | |
# access_token_scopes: 'profile, email, openid' | |
- name: Set up python | |
id: setup-python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Install dependencies | |
run: | | |
pwd | |
cd scripts/firecloud_api/ | |
pip install -r requirements.txt | |
- name: Set Commit Hash | |
id: set_commit_hash | |
run: echo "COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV | |
# Set the branch name. | |
# github.head_ref contains the name of the branch in the context of a pull request | |
# if github.head_ref is empty, it implies the workflow was triggered manually | |
# ${GITHUB_REF##*/} extracts the branch name from GITHUB_REF. | |
# The ##*/ is a parameter expansion that removes the refs/heads/ prefix, leaving just the branch name. | |
- name: Set Branch Name | |
id: set_branch | |
run: | | |
if [ -z "${{ github.head_ref }}" ]; then | |
echo "Branch name is missing, using ${GITHUB_REF##*/}" | |
echo "branch_name=${GITHUB_REF##*/}" >> $GITHUB_ENV | |
else | |
echo "Branch name from PR: ${{ github.head_ref }}" | |
echo "branch_name=${{ github.head_ref }}" >> $GITHUB_ENV | |
fi | |
- name: Set Test Type for PRs | |
if: ${{ github.event_name == 'pull_request' }} | |
id: set_test_type | |
run: | | |
# Default to "Scientific" if targeting master | |
if [ "${{ github.base_ref }}" == "master" ]; then | |
echo "testType=Scientific" >> $GITHUB_ENV | |
else | |
echo "testType=Plumbing" >> $GITHUB_ENV | |
fi | |
- name: Use Provided Test Type | |
if: ${{ github.event_name == 'workflow_dispatch' }} | |
id: use_provided_test_type | |
run: | | |
# Use the testType provided by the user | |
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV | |
- name: Update test inputs and Upload to Terra | |
run: | | |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" | |
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" | |
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" | |
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") | |
MAX_RETRIES=2 | |
RETRY_DELAY=300 # 300 seconds = 5 minutes | |
# Initialize variables to aggregate statuses and outputs | |
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" | |
ALL_OUTPUTS="" | |
# Initialize arrays to track submission and workflow statuses | |
declare -a SUBMISSION_IDS | |
declare -A WORKFLOW_STATUSES | |
# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) | |
if [ "$UPDATE_TRUTH" = "true" ]; then | |
UPDATE_TRUTH_BOOL=true | |
else | |
UPDATE_TRUTH_BOOL=false | |
fi | |
if [ "$USE_CALL_CACHE" == "true" ]; then | |
USE_CALL_CACHE_BOOL=true | |
else | |
USE_CALL_CACHE_BOOL=false | |
fi | |
PIPELINE_NAME="TestIlluminaGenotypingArray" | |
PIPELINE_DIR="pipelines/broad/genotyping/illumina" | |
TEST_TYPE="${{ env.testType }}" | |
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" | |
echo "Running tests with test type: $TEST_TYPE" | |
TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" | |
echo "Truth path: $TRUTH_PATH" | |
RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME" | |
# Create the submission_data.json file which will be the same for all inputs | |
SUBMISSION_DATA_FILE="submission_data.json" | |
# Use a heredoc to generate the JSON file content dynamically | |
cat <<EOF > "$SUBMISSION_DATA_FILE" | |
{ | |
"methodConfigurationNamespace": "warp-pipelines", | |
"methodConfigurationName": "$PIPELINE_NAME", | |
"useCallCache": $USE_CALL_CACHE_BOOL, | |
"deleteIntermediateOutputFiles": false, | |
"useReferenceDisks": true, | |
"memoryRetryMultiplier": 1.2, | |
"workflowFailureMode": "NoNewCalls", | |
"userComment": "Automated submission", | |
"ignoreEmptyOutputs": false | |
} | |
EOF | |
echo "Created submission data file: $SUBMISSION_DATA_FILE" | |
# 1. Submit all jobs first and store their submission IDs | |
for input_file in "$INPUTS_DIR"/*.json; do | |
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ | |
--results_path "$RESULTS_PATH" \ | |
--inputs_json "$input_file" \ | |
--update_truth "$UPDATE_TRUTH_BOOL" \ | |
--commit_hash "$COMMIT_HASH" ) | |
echo "Uploading the test input file: $test_input_file" | |
python3 scripts/firecloud_api/firecloud_api2.py \ | |
upload_test_inputs \ | |
--workspace-namespace warp-pipelines \ | |
--workspace-name "WARP Tests" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--test_input_file "$test_input_file" \ | |
--branch_name "$branch_name" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "[email protected]" | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \ | |
--workspace-namespace "warp-pipelines" \ | |
--workspace-name "WARP Tests" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "[email protected]" \ | |
--submission_data_file "$SUBMISSION_DATA_FILE") | |
echo "Submission ID: $SUBMISSION_ID" | |
if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then | |
echo "Error in submission, retrying in $RETRY_DELAY seconds..." | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
echo "Submission successful. Submission ID: $SUBMISSION_ID" | |
SUBMISSION_IDS+=("$SUBMISSION_ID") | |
break | |
done | |
done | |
echo "All jobs have been submitted. Starting to poll for statuses..." | |
# 2. After all submissions are done, start polling for statuses of all jobs | |
for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
echo "Polling for Submission ID: $SUBMISSION_ID" | |
RESPONSE=$(python3 scripts/firecloud_api/firecloud_api2.py poll_job_status \ | |
--submission_id "$SUBMISSION_ID" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "[email protected]" \ | |
--workspace-namespace "warp-pipelines" \ | |
--workspace-name "WARP Tests") | |
if [ -z "$RESPONSE" ]; then | |
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') | |
WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" | |
# retrieve workflow outputs | |
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." | |
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do | |
WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api2.py get_workflow_outputs \ | |
--user "[email protected]" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--submission_id "$SUBMISSION_ID" \ | |
--workspace-namespace warp-pipelines \ | |
--workspace-name "WARP Tests" \ | |
--workflow_id "$WORKFLOW_ID" \ | |
--pipeline_name "$PIPELINE_NAME") | |
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' | |
done | |
break | |
done | |
# Generate final summary tables with hyperlinks for Submission IDs | |
echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY | |
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do | |
# Generate the Terra URL for the submission | |
SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID" | |
# Add the Submission ID as a hyperlink | |
echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY | |
# Add the workflows and statuses for this submission | |
echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY | |
# Add a blank line for separation | |
echo "" >> $GITHUB_STEP_SUMMARY | |
done | |
done | |
# - name: Monitor Workflow Status | |
# run: | | |
# echo "Monitoring the status of submitted workflows..." | |
# # Convert the space-separated string into an array | |
# IFS=' ' read -r -a submission_ids_array <<< "$submission_ids" | |
# | |
# # Ensure the array is populated | |
# if [ ${#submission_ids_array[@]} -eq 0 ]; then | |
# echo "No submission IDs found. Exiting..." | |
# exit 1 | |
# fi | |
# | |
# declare -A WORKFLOW_STATUSES | |
# | |
# echo "submission_ids_array: ${submission_ids_array[@]}" | |
# | |
# for SUBMISSION_ID in "${submission_ids_array[@]}"; do | |
# echo "Polling submission status for Submission ID: $SUBMISSION_ID" | |
# RESPONSE=$(python3 scripts/firecloud_api/firecloud_api2.py poll_job_status \ | |
# --submission_id "$SUBMISSION_ID" \ | |
# --sa-json-b64 "$SA_JSON_B64" \ | |
# --user "[email protected]" \ | |
# --workspace-namespace "warp-pipelines" \ | |
# --workspace-name "WARP Tests") | |
# | |
# if [ -z "$RESPONSE" ]; then | |
# echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" | |
# continue | |
# fi | |
# | |
# # Debug raw response | |
# echo "Raw Workflow Statuses:" | |
# echo "$RESPONSE" | |
# | |
# # Parse and store workflow statuses | |
# echo "Parsing workflow statuses for submissions" | |
# WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') | |
# echo "Raw Workflow Statuses: $WORKFLOW_STATUSES_FOR_SUBMISSION" | |
# #up until here there are no issues with the script | |
# | |
# # Remove duplicates and sanitize | |
# CLEANED_WORKFLOW_STATUSES=$(echo "$WORKFLOW_STATUSES_FOR_SUBMISSION" | awk '!seen[$0]++') | |
# # Debug cleaned statuses | |
# echo "Cleaned Workflow Statuses: $CLEANED_WORKFLOW_STATUSES" | |
# | |
# # Reset and assign to associative array | |
# WORKFLOW_STATUSES["$SUBMISSION_ID"]="$CLEANED_WORKFLOW_STATUSES" | |
# | |
# #Debug stored status | |
# echo "Stored Workflow Statuses for $SUBMISSION_ID: ${WORKFLOW_STATUSES[$SUBMISSION_ID]}" | |
# | |
# # Append to aggregate statuses | |
# echo "Appending to aggregate statuses" | |
# CLEANED_WORKFLOW_STATUSES=$(echo "$WORKFLOW_STATUSES_FOR_SUBMISSION" | tr -d '\n' | tr -d '\r') | |
# echo "CLEANED_WORKFLOW_STATUSES: $CLEANED_WORKFLOW_STATUSES" | |
# WORKFLOW_STATUSES["$SUBMISSION_ID"]="$CLEANED_WORKFLOW_STATUSES" | |
# done | |
# # # Retrieve workflow outputs | |
# # echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." | |
# # for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do | |
# # WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api2.py get_outputs \ | |
# # --submission_id "$SUBMISSION_ID" \ | |
# # --workflow_id "$WORKFLOW_ID" \ | |
# # --pipeline_name "$PIPELINE_NAME") | |
# # ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' | |
# # done | |
# # done | |
# # Generate summary for Submission IDs | |
# echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY | |
# for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do | |
# # Generate the Terra URL for the submission | |
# SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID" | |
# | |
# # Add the Submission ID as a hyperlink | |
# echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY | |
# | |
# # Add the workflows and statuses for this submission | |
# echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY | |
# | |
# # Add a blank line for separation | |
# echo "" >> $GITHUB_STEP_SUMMARY | |
# done | |
env: | |
#SUBMISSION_IDS: ${{ steps.submit_jobs.outputs.submission_ids }} # Pass IDs from a previous step | |
PIPELINE_NAME: TestIlluminaGenotypingArray | |
NAMESPACE: warp-pipelines | |
WORKSPACE: WARP Tests | |
#- name: Download Commit Hash from GCP | |
# run: | | |
# gsutil cp gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/commit_hash.txt ./commit_hash.txt | |
#- name: Check Commit Hash | |
# id: check_commit_hash | |
# run: | | |
# # Read the commit hash from the downloaded file | |
# COMMIT_HASH_FROM_WDL=$(cat commit_hash.txt) | |
# | |
# # Compare the two commit hashes | |
# if [ "$COMMIT_HASH_FROM_WDL" != "${{ env.COMMIT_HASH }}" ]; then | |
# echo "Error: The commit hash from the WDL output does not match the expected commit hash." | |
# exit 1 | |
# else | |
# echo "Commit hash match successful: $COMMIT_HASH_FROM_WDL" | |
# fi | |
- name: Print Summary on Success | |
if: success() | |
run: | | |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY | |
- name: Print Summary on Failure | |
if: failure() | |
run: | | |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY |