Skip to content

Test Illumina Genotyping Array #565

Test Illumina Genotyping Array

Test Illumina Genotyping Array #565

name: Test Illumina Genotyping Array
# Controls when the workflow will run
on:
#run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING
# push:
# branches:
# - kp_GHA_Terra_auth_PD-2682
pull_request:
branches: [ "develop", "staging", "master" ]
# Only run if files in these paths changed:
####################################
# SET PIPELINE SPECIFIC PATHS HERE #
####################################
paths:
- 'pipelines/broad/genotyping/illumina/**'
- 'tasks/broad/IlluminaGenotypingArrayTasks.wdl'
- 'tasks/broad/Qc.wdl'
- 'verification/VerifyIlluminaGenotypingArray.wdl'
- 'verification/test-wdls/TestIlluminaGenotypingArray.wdl'
- 'tasks/broad/Utilities.wdl'
- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
- '.github/workflows/test_illumina_genotyping_array.yml'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
inputs:
useCallCache:
description: 'Use call cache (default: true)'
required: false
default: "true"
updateTruth:
description: 'Update truth files (default: false)'
required: false
default: "false"
testType:
description: 'Specify the type of test (Plumbing or Scientific)'
required: true
truthBranch:
description: 'Specify the branch for truth files (default: master)'
required: false
default: "master"
env:
PROJECT_NAME: WARP
# Github repo name
REPOSITORY_NAME: ${{ github.event.repository.name }}
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_CREDENTIALS }}
jobs:
run_pipeline:
runs-on: ubuntu-latest
# Add "id-token" with the intended permissions.
permissions:
contents: 'read'
id-token: 'write'
steps:
# Add a step to wait to account for github -> dockstore -> terra delays
- name: Wait Before Starting
run: |
echo "Waiting for 5 minutes before starting..."
sleep 1 # time in seconds, update this when we really want a delay
# actions/checkout MUST come before auth
- uses: actions/checkout@v3
with:
ref: ${{ github.ref }}
# id: 'auth'
# name: 'Authenticate to Google Cloud'
# uses: 'google-github-actions/auth@v2'
# with:
# token_format: 'access_token'
# # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting
# # This is provided by the DevOps team - do not change!
# workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider'
# # This is our tester service account
# service_account: '[email protected]'
# access_token_lifetime: '3600' # seconds, default is 3600
# access_token_scopes: 'profile, email, openid'
- name: Set up python
id: setup-python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pwd
cd scripts/firecloud_api/
pip install -r requirements.txt
- name: Set Commit Hash
id: set_commit_hash
run: echo "COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV
# Set the branch name.
# github.head_ref contains the name of the branch in the context of a pull request
# if github.head_ref is empty, it implies the workflow was triggered manually
# ${GITHUB_REF##*/} extracts the branch name from GITHUB_REF.
# The ##*/ is a parameter expansion that removes the refs/heads/ prefix, leaving just the branch name.
- name: Set Branch Name
id: set_branch
run: |
if [ -z "${{ github.head_ref }}" ]; then
echo "Branch name is missing, using ${GITHUB_REF##*/}"
echo "branch_name=${GITHUB_REF##*/}" >> $GITHUB_ENV
else
echo "Branch name from PR: ${{ github.head_ref }}"
echo "branch_name=${{ github.head_ref }}" >> $GITHUB_ENV
fi
- name: Set Test Type for PRs
if: ${{ github.event_name == 'pull_request' }}
id: set_test_type
run: |
# Default to "Scientific" if targeting master
if [ "${{ github.base_ref }}" == "master" ]; then
echo "testType=Scientific" >> $GITHUB_ENV
else
echo "testType=Plumbing" >> $GITHUB_ENV
fi
- name: Use Provided Test Type
if: ${{ github.event_name == 'workflow_dispatch' }}
id: use_provided_test_type
run: |
# Use the testType provided by the user
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV
- name: Update test inputs and Upload to Terra
run: |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
MAX_RETRIES=2
RETRY_DELAY=300 # 300 seconds = 5 minutes
# Initialize variables to aggregate statuses and outputs
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
ALL_OUTPUTS=""
# Initialize arrays to track submission and workflow statuses
declare -a SUBMISSION_IDS
declare -A WORKFLOW_STATUSES
# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
if [ "$UPDATE_TRUTH" = "true" ]; then
UPDATE_TRUTH_BOOL=true
else
UPDATE_TRUTH_BOOL=false
fi
if [ "$USE_CALL_CACHE" == "true" ]; then
USE_CALL_CACHE_BOOL=true
else
USE_CALL_CACHE_BOOL=false
fi
PIPELINE_NAME="TestIlluminaGenotypingArray"
PIPELINE_DIR="pipelines/broad/genotyping/illumina"
TEST_TYPE="${{ env.testType }}"
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
echo "Running tests with test type: $TEST_TYPE"
TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
echo "Truth path: $TRUTH_PATH"
RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"
# Create the submission_data.json file which will be the same for all inputs
SUBMISSION_DATA_FILE="submission_data.json"
# Use a heredoc to generate the JSON file content dynamically
cat <<EOF > "$SUBMISSION_DATA_FILE"
{
"methodConfigurationNamespace": "warp-pipelines",
"methodConfigurationName": "$PIPELINE_NAME",
"useCallCache": $USE_CALL_CACHE_BOOL,
"deleteIntermediateOutputFiles": false,
"useReferenceDisks": true,
"memoryRetryMultiplier": 1.2,
"workflowFailureMode": "NoNewCalls",
"userComment": "Automated submission",
"ignoreEmptyOutputs": false
}
EOF
echo "Created submission data file: $SUBMISSION_DATA_FILE"
for input_file in "$INPUTS_DIR"/*.json; do
echo "Processing input file: $input_file"
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
--results_path "$RESULTS_PATH" \
--inputs_json "$input_file" \
--update_truth "$UPDATE_TRUTH_BOOL" \
--commit_hash "$COMMIT_HASH" )
echo "Uploading the test input file: $test_input_file"
echo "Branch name: $branch_name"
python3 scripts/firecloud_api/firecloud_api2.py \
upload_test_inputs \
--workspace-namespace warp-pipelines \
--workspace-name "WARP Tests" \
--pipeline_name "$PIPELINE_NAME" \
--test_input_file "$test_input_file" \
--branch_name "$branch_name" \
--sa-json-b64 "$SA_JSON_B64" \
--user "[email protected]"
attempt=1
while [ $attempt -le $MAX_RETRIES ]; do
echo "Attempt $attempt: Submitting job for input file: $input_file"
#SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \
# --workspace-namespace "warp-pipelines" \
# --workspace-name "WARP Tests" \
# --sa-json-b64 "$SA_JSON_B64" \
# --user "[email protected]" \
# --submission_data_file "$SUBMISSION_DATA_FILE")
#
SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \
--workspace-namespace "warp-pipelines" \
--workspace-name "WARP Tests" \
--sa-json-b64 "$SA_JSON_B64" \
--user "[email protected]" \
--submission_data_file "$SUBMISSION_DATA_FILE")
echo "Submission ID: $SUBMISSION_ID"
if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then
echo "Error in submission, retrying in $RETRY_DELAY seconds..."
((attempt++))
if [ $attempt -gt $MAX_RETRIES ]; then
echo "Max retries reached. Exiting..."
exit 1
fi
sleep $RETRY_DELAY
continue
fi
echo "Submission successful. Submission ID: $SUBMISSION_ID"
SUBMISSION_IDS+=("$SUBMISSION_ID")
break
done
done
#export submission ids to github env
echo "submission_ids=${SUBMISSION_IDS[@]}" >> $GITHUB_ENV
- name: Monitor Workflow Status
run: |
echo "Monitoring the status of submitted workflows..."
# Convert the space-separated string into an array
IFS=' ' read -r -a submission_ids_array <<< "$submission_ids"
for SUBMISSION_ID in "${submission_ids_array[@]}"; do
echo "Polling submission status for Submission ID: $SUBMISSION_ID"
RESPONSE=$(python3 scripts/firecloud_api/firecloud_api2.py poll_job_status \
--submission_id "$SUBMISSION_ID" \
--sa-json-b64 "$SA_JSON_B64" \
--user "[email protected]" \
--workspace-namespace "warp-pipelines" \
--workspace-name "WARP Tests")
if [ -z "$RESPONSE" ]; then
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
continue
fi
# Parse and store workflow statuses
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
echo "Workflow statuses for submission $WORKFLOW_STATUSES_FOR_SUBMISSION:"
echo "Statuses for submission $SUBMISSION_ID:"
echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"
# Append to aggregate statuses
WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION
# Retrieve workflow outputs
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api2.py get_outputs \
--submission_id "$SUBMISSION_ID" \
--workflow_id "$WORKFLOW_ID" \
--pipeline_name "$PIPELINE_NAME")
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
done
done
# Generate summary for Submission IDs
echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
# Generate the Terra URL for the submission
SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"
# Add the Submission ID as a hyperlink
echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
# Add the workflows and statuses for this submission
echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
# Add a blank line for separation
echo "" >> $GITHUB_STEP_SUMMARY
done
env:
SUBMISSION_IDS: ${{ steps.submit_jobs.outputs.submission_ids }} # Pass IDs from a previous step
PIPELINE_NAME: TestIlluminaGenotypingArray
NAMESPACE: warp-pipelines
WORKSPACE: WARP Tests
#- name: Update and Upload method configuration
# id: pipeline_run
# run: |
# # Set common environment variables
# TOKEN="${{ steps.auth.outputs.access_token }}"
# NAMESPACE="warp-pipelines"
# WORKSPACE="WARP Tests"
# USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
# UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
# #TEST_TYPE="${{ github.event.inputs.testType || 'Plumbing' }}"
# TEST_TYPE="${{ env.testType }}"
# TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
# CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
#
# echo "truth branch: $TRUTH_BRANCH"
#
# ########################################
# # SET PIPELINE SPECIFIC VARIABLES HERE #
# ########################################
# PIPELINE_NAME="TestIlluminaGenotypingArray"
# PIPELINE_DIR="pipelines/broad/genotyping/illumina"
# # TODO: Need to set the truth and result paths appropriately
# # TODO: Need to dynamically set the truth branch, for now it is hardcoded to master branch
# # We may want to keep the truth and resuts buckets separate for TTL reasons
# TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
# RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"
#
#
# # Function to call the Firecloud API using the firecloud_api2.py script
# firecloud_action() {
# python3 scripts/firecloud_api/firecloud_api2.py --action "$1" "${@:2}"
# }
#
#
# # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
# if [ "$USE_CALL_CACHE" == "true" ]; then
# USE_CALL_CACHE_BOOL=true
# else
# USE_CALL_CACHE_BOOL=false
# fi
#
#
# # Convert UPDATE_TRUTH to a boolean-friendly format ("true" -> true, "false" -> false)
# if [ "$UPDATE_TRUTH" = "true" ]; then
# UPDATE_TRUTH_BOOL=true
# else
# UPDATE_TRUTH_BOOL=false
# fi
#
# # Create the submission_data.json file which will be the same for all inputs
# SUBMISSION_DATA_FILE="submission_data.json"
#
# # Use a heredoc to generate the JSON file content dynamically
# cat <<EOF > "$SUBMISSION_DATA_FILE"
# {
# "methodConfigurationNamespace": "warp-pipelines",
# "methodConfigurationName": "$PIPELINE_NAME",
# "useCallCache": $USE_CALL_CACHE_BOOL,
# "deleteIntermediateOutputFiles": false,
# "useReferenceDisks": true,
# "memoryRetryMultiplier": 1.2,
# "workflowFailureMode": "NoNewCalls",
# "userComment": "Automated submission",
# "ignoreEmptyOutputs": false
# }
# EOF
# echo "Created submission data file: $SUBMISSION_DATA_FILE"
#
# # Initialize variables to aggregate statuses and outputs
# ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
# ALL_OUTPUTS=""
#
# # Initialize arrays to track submission and workflow statuses
# declare -a SUBMISSION_IDS
# declare -A WORKFLOW_STATUSES
#
# # Loop through each file in the appropriate test inputs directory
# INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
#
# echo "Running tests with test type: $TEST_TYPE"
#
# MAX_RETRIES=2
# RETRY_DELAY=300 # 300 seconds = 5 minutes
#
# for input_file in "$INPUTS_DIR"/*.json; do
# echo "Processing input file: $input_file"
# test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
# --results_path "$RESULTS_PATH" \
# --inputs_json "$input_file" \
# --update_truth "$UPDATE_TRUTH_BOOL" \
# --commit_hash "$COMMIT_HASH" )
# echo "Uploading the test input file: $test_input_file"
# echo "Branch name: $branch_name"
#
# python3 scripts/firecloud_api/firecloud_api2.py upload_test_inputs \
# --workspace-namespace warp-pipelines \
# --workspace-name "WARP Tests" \
# --pipeline_name "$PIPELINE_NAME" \
# --test_input_file "$test_input_file" \
# --branch_name "$branch_name" \
# --sa-json-b64 "$SA_JSON_B64" \
# --user "[email protected]"
# done
# attempt=1
# while [ $attempt -le $MAX_RETRIES ]; do
# echo "Attempt $attempt: Submitting job for input file: $input_file"
# #echo "Submitting job for input file: $input_file"
# cat "$SUBMISSION_DATA_FILE"
# SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE")
#
# if [[ "$SUBMISSION_ID" == *"404"* ]]; then
# echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..."
# sleep $RETRY_DELAY
# ((attempt++))
# elif [ -z "$SUBMISSION_ID" ]; then
# echo "Submission failed for input file: $input_file. No submission ID received."
# break
# else
# echo "Submission successful. Submission ID: $SUBMISSION_ID"
# SUBMISSION_IDS+=("$SUBMISSION_ID")
# break
# fi
#
# if [ $attempt -gt $MAX_RETRIES ]; then
# echo "Max retries reached. Exiting..."
# fi
# done
# done
#
# #echo "Submission ID: $SUBMISSION_ID"
# #SUBMISSION_IDS+=("$SUBMISSION_ID")
#
#
# echo "Monitoring the status of submitted workflows..."
# for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
# echo "Polling submission status for Submission ID: $SUBMISSION_ID"
# RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID")
#
# if [ -z "$RESPONSE" ]; then
# echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
# continue
# fi
#
# # Parse and store workflow statuses
# WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
# echo "Statuses for submission $SUBMISSION_ID:"
# echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"
#
# # Append to aggregate statuses
# WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION
#
# # retrieve workflow outputs
# echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
# for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
# WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME")
# ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
# done
# done
#
# # Generate final summary tables with hyperlinks for Submission IDs
# echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
# for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
# # Generate the Terra URL for the submission
# SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"
#
# # Add the Submission ID as a hyperlink
# echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
#
# # Add the workflows and statuses for this submission
# echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
#
# # Add a blank line for separation
# echo "" >> $GITHUB_STEP_SUMMARY
# done
- name: Download Commit Hash from GCP
run: |
gsutil cp gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/commit_hash.txt ./commit_hash.txt
- name: Check Commit Hash
id: check_commit_hash
run: |
# Read the commit hash from the downloaded file
COMMIT_HASH_FROM_WDL=$(cat commit_hash.txt)
# Compare the two commit hashes
if [ "$COMMIT_HASH_FROM_WDL" != "${{ env.COMMIT_HASH }}" ]; then
echo "Error: The commit hash from the WDL output does not match the expected commit hash."
exit 1
else
echo "Commit hash match successful: $COMMIT_HASH_FROM_WDL"
fi
- name: Print Summary on Success
if: success()
run: |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY
- name: Print Summary on Failure
if: failure()
run: |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY