Np jw test illumina genotyping arrays #765
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Illumina Genotyping Array | |
# Controls when the workflow will run | |
on: | |
#run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING | |
# push: | |
# branches: | |
# - kp_GHA_Terra_auth_PD-2682 | |
pull_request: | |
branches: [ "develop", "staging", "master" ] | |
# Only run if files in these paths changed: | |
#################################### | |
# SET PIPELINE SPECIFIC PATHS HERE # | |
#################################### | |
paths: | |
- 'pipelines/broad/genotyping/illumina/**' | |
- 'tasks/broad/IlluminaGenotypingArrayTasks.wdl' | |
- 'tasks/broad/Qc.wdl' | |
- 'verification/VerifyIlluminaGenotypingArray.wdl' | |
- 'verification/test-wdls/TestIlluminaGenotypingArray.wdl' | |
- 'tasks/broad/Utilities.wdl' | |
- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' | |
- '.github/workflows/test_illumina_genotyping_array.yml' | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
inputs: | |
useCallCache: | |
description: 'Use call cache (default: true)' | |
required: false | |
default: "true" | |
updateTruth: | |
description: 'Update truth files (default: false)' | |
required: false | |
default: "false" | |
testType: | |
description: 'Specify the type of test (Plumbing or Scientific)' | |
required: true | |
truthBranch: | |
description: 'Specify the branch for truth files (default: master)' | |
required: false | |
default: "master" | |
env: | |
# pipeline configuration | |
PROJECT_NAME: WARP | |
PIPELINE_NAME: TestIlluminaGenotypingArray | |
DOCKSTORE_PIPELINE_NAME: IlluminaGenotypingArray | |
# workspace configuration | |
TESTING_WORKSPACE: WARP Tests | |
WORKSPACE_NAMESPACE: warp-pipelines | |
# github repo configuration | |
REPOSITORY_NAME: ${{ github.event.repository.name }} | |
# service account configuration | |
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} | |
USER: [email protected] | |
jobs: | |
run_pipeline: | |
runs-on: ubuntu-latest | |
# Add "id-token" with the intended permissions. | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
# actions/checkout MUST come before auth | |
- uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.ref }} | |
- name: Set up python | |
id: setup-python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Install dependencies | |
run: | | |
cd scripts/firecloud_api/ | |
pip install -r requirements.txt | |
- name: Set Branch Name | |
id: set_branch | |
run: | | |
if [ -z "${{ github.head_ref }}" ]; then | |
echo "Branch name is missing, using ${GITHUB_REF##*/}" | |
echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV | |
else | |
echo "Branch name from PR: ${{ github.head_ref }}" | |
echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV | |
fi | |
- name: Determine Github Commit Hash | |
id: determine_github_commit_hash | |
run: | | |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then | |
echo "Using github.sha for manually triggered workflow." | |
echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV | |
elif [ "${{ github.event_name }}" == "pull_request" ]; then | |
echo "Using github.event.pull_request.head.sha for PR-triggered workflow." | |
echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV | |
else | |
echo "Unsupported event type: ${{ github.event_name }}" | |
exit 1 | |
fi | |
- name: Fetch Dockstore Workflow Commit Hash | |
run: | | |
# Wait 5.5 minutes for Dockstore to update | |
sleep 1 | |
DOCKSTORE_COMMIT_HASH_FROM_FETCH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ | |
$DOCKSTORE_TOKEN \ | |
$DOCKSTORE_PIPELINE_NAME \ | |
$BRANCH_NAME) | |
# Export the commit hash as an environment variable | |
echo "DOCKSTORE_COMMIT_HASH=$DOCKSTORE_COMMIT_HASH_FROM_FETCH" >> $GITHUB_ENV | |
echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH_FROM_FETCH" | |
env: | |
## TODO NEED TO ADD DOCKSTORE_TOKEN FOR SERVICE ACCOUNT ## | |
DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} | |
DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }} | |
BRANCH_NAME: ${{ env.BRANCH_NAME }} | |
#- name: Compare Dockstore and Commit Hashes | |
# id: compare_hashes | |
# run: | | |
# echo "Comparing hashes..." | |
# echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" | |
# echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" | |
# | |
# if [ "$DOCKSTORE_COMMIT_HASH" != "$GITHUB_COMMIT_HASH" ]; then | |
# echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash!" | |
# echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" | |
# exit 1 | |
# else | |
# echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." | |
# fi | |
# env: | |
# DOCKSTORE_COMMIT_HASH: ${{ env.DOCKSTORE_COMMIT_HASH }} | |
# GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} | |
#- name: Set Test Type for PRs | |
# if: ${{ github.event_name == 'pull_request' }} | |
# id: set_test_type | |
# run: | | |
# # Default to "Scientific" if targeting master | |
# if [ "${{ github.base_ref }}" == "master" ]; then | |
# echo "testType=Scientific" >> $GITHUB_ENV | |
# else | |
# echo "testType=Plumbing" >> $GITHUB_ENV | |
# fi | |
# | |
#- name: Use Provided Test Type | |
# if: ${{ github.event_name == 'workflow_dispatch' }} | |
# id: use_provided_test_type | |
# run: | | |
# # Use the testType provided by the user | |
# echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV | |
- name: Set Test Type | |
id: set_test_type | |
run: | | |
if [ "${{ github.event_name }}" == "pull_request" ]; then | |
# For PRs, set based on target branch | |
if [ "${{ github.base_ref }}" == "master" ]; then | |
echo "testType=Scientific" >> $GITHUB_ENV | |
echo "testType=Scientific" | |
else | |
echo "testType=Plumbing" >> $GITHUB_ENV | |
echo "testType=Plumbing" | |
fi | |
else | |
# For workflow_dispatch, use provided test type | |
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV | |
echo "testType=${{ github.event.inputs.testType }}" | |
fi | |
- name: Create new method configuration | |
run: | | |
echo "Creating new method configuration for branch: $BRANCH_NAME" | |
METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ | |
create_new_method_config \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--branch_name "$BRANCH_NAME" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER") | |
echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV | |
env: | |
PIPELINE_NAME: ${{ env.PIPELINE_NAME }} | |
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} | |
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} | |
USER: ${{ env.USER }} | |
- name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs | |
run: | | |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" | |
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" | |
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" | |
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") | |
MAX_RETRIES=2 | |
RETRY_DELAY=300 # 300 seconds = 5 minutes | |
# Initialize variables to aggregate statuses and outputs | |
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" | |
ALL_OUTPUTS="" | |
# Initialize arrays to track submission and workflow statuses | |
declare -a SUBMISSION_IDS | |
declare -A WORKFLOW_STATUSES | |
# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) | |
if [ "$UPDATE_TRUTH" = "true" ]; then | |
UPDATE_TRUTH_BOOL=true | |
else | |
UPDATE_TRUTH_BOOL=false | |
fi | |
if [ "$USE_CALL_CACHE" == "true" ]; then | |
USE_CALL_CACHE_BOOL=true | |
else | |
USE_CALL_CACHE_BOOL=false | |
fi | |
PIPELINE_DIR="pipelines/broad/genotyping/illumina" | |
TEST_TYPE="${{ env.testType }}" | |
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" | |
echo "Running tests with test type: $TEST_TYPE" | |
TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" | |
echo "Truth path: $TRUTH_PATH" | |
RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME" | |
# Create the submission_data.json file which will be the same for all inputs | |
SUBMISSION_DATA_FILE="submission_data.json" | |
# Use a heredoc to generate the JSON file content dynamically | |
cat <<EOF > "$SUBMISSION_DATA_FILE" | |
{ | |
"methodConfigurationNamespace": "$WORKSPACE_NAMESPACE", | |
"methodConfigurationName": "$METHOD_CONFIG_NAME", | |
"useCallCache": $USE_CALL_CACHE_BOOL, | |
"deleteIntermediateOutputFiles": false, | |
"useReferenceDisks": true, | |
"memoryRetryMultiplier": 1.2, | |
"workflowFailureMode": "NoNewCalls", | |
"userComment": "Automated submission", | |
"ignoreEmptyOutputs": false | |
} | |
EOF | |
echo "Created submission data file: $SUBMISSION_DATA_FILE" | |
# 1. Submit all jobs first and store their submission IDs | |
for input_file in "$INPUTS_DIR"/*.json; do | |
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ | |
--results_path "$RESULTS_PATH" \ | |
--inputs_json "$input_file" \ | |
--update_truth "$UPDATE_TRUTH_BOOL" \ | |
--branch_name "$BRANCH_NAME" ) | |
echo "Uploading the test input file: $test_input_file" | |
python3 scripts/firecloud_api/firecloud_api.py \ | |
upload_test_inputs \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--test_input_file "$test_input_file" \ | |
--branch_name "$BRANCH_NAME" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ | |
--workspace-namespace "$WORKSPACE_NAMESPACE" \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--submission_data_file "$SUBMISSION_DATA_FILE") | |
echo "Submission ID: $SUBMISSION_ID" | |
if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then | |
echo "Error in submission, retrying in $RETRY_DELAY seconds..." | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
echo "Submission successful. Submission ID: $SUBMISSION_ID" | |
SUBMISSION_IDS+=("$SUBMISSION_ID") | |
break | |
done | |
done | |
echo "All jobs have been submitted. Starting to poll for statuses..." | |
# 2. After all submissions are done, start polling for statuses of all jobs | |
for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
echo "Polling for Submission ID: $SUBMISSION_ID" | |
RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ | |
--submission_id "$SUBMISSION_ID" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--workspace-namespace "$WORKSPACE_NAMESPACE" \ | |
--workspace-name "$TESTING_WORKSPACE") | |
if [ -z "$RESPONSE" ]; then | |
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') | |
WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" | |
# retrieve workflow outputs | |
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." | |
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do | |
WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ | |
--user "$USER" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--submission_id "$SUBMISSION_ID" \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--workflow_id "$WORKFLOW_ID" \ | |
--pipeline_name "$PIPELINE_NAME") | |
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' | |
done | |
break | |
done | |
# Generate final summary tables with hyperlinks for Submission IDs | |
echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY | |
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do | |
# Generate the Terra URL for the submission | |
SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID" | |
# Add the Submission ID as a hyperlink | |
echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY | |
# Add the workflows and statuses for this submission | |
echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY | |
# Add a blank line for separation | |
echo "" >> $GITHUB_STEP_SUMMARY | |
done | |
done | |
env: | |
PIPELINE_NAME: ${{ env.PIPELINE_NAME }} | |
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} | |
METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }} | |
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} | |
USER: ${{ env.USER }} | |
- name: Delete Method Configuration | |
if: always() # Ensures it runs regardless of success or failure | |
run: | | |
echo "Deleting method configuration for branch: $BRANCH_NAME" | |
DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--branch_name "$BRANCH_NAME" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--method_config_name "$METHOD_CONFIG_NAME") | |
echo "Delete response: $DELETE_RESPONSE" | |
if [ "$DELETE_RESPONSE" == "True" ]; then | |
echo "Method configuration deleted successfully." | |
else | |
echo "Error: Method configuration deletion failed." | |
exit 1 | |
fi | |
env: | |
PIPELINE_NAME: ${{ env.PIPELINE_NAME }} | |
BRANCH_NAME: ${{ env.BRANCH_NAME }} | |
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} | |
METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }} | |
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} | |
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} | |
USER: ${{ env.USER }} | |
#### TODO is this even ok to do??? ### | |
#- name: Decode the base64-encoded Google Cloud service account key | |
# run: | | |
# # Decode the base64 secret to the JSON file | |
# echo ${{ secrets.PDT_TESTER_SA_B64 }} | base64 --decode > $HOME/gcloud-key.json | |
# # Set GOOGLE_APPLICATION_CREDENTIALS environment variable to point to the JSON key file | |
# export GOOGLE_APPLICATION_CREDENTIALS=$HOME/gcloud-key.json | |
#- name: Authenticate with Google Cloud | |
# run: | | |
# gcloud auth activate-service-account --key-file=$HOME/gcloud-key.json | |
#- name: Download the Terra Commit Hash and Compare to Github Commit Hash | |
# run: | | |
# gsutil cp gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/commit_hash.txt . | |
# export TERRA_COMMIT_HASH=$(cat commit_hash.txt) | |
# echo "Terra Commit Hash: $TERRA_COMMIT_HASH" | |
# echo "GitHub Commit Hash: ${{ github.sha }}" | |
# | |
# #compare the Terra commit hash to the github commit hash | |
# if [ "$TERRA_COMMIT_HASH" != "${{ github.sha }}" ]; then | |
# echo "Error: The Terra Commit Hash does not match the GitHub Commit Hash!" | |
# echo "Mismatch found: Terra Commit Hash $TERRA_COMMIT_HASH != Github Commit Hash ${{ github.sha }}" | |
# exit 1 | |
# else | |
# echo "Success: The Terra Commit Hash matches the GitHub Commit Hash." | |
# fi | |
- name: Print Summary on Success | |
if: success() | |
run: | | |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY | |
- name: Print Summary on Failure | |
if: failure() | |
run: | | |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY |