Skip to content

Np jw test illumina genotyping arrays #2

Np jw test illumina genotyping arrays

Np jw test illumina genotyping arrays #2

name: Test JointGenotyping
# Controls when the workflow will run
on:
pull_request:
branches: [ "develop", "staging", "master" ]
# Only run if files in these paths changed:
####################################
# SET PIPELINE SPECIFIC PATHS HERE #
####################################
paths:
- 'pipelines/broad/dna_seq/germline/joint_genotyping/**'
- 'tasks/broad/JointGenotypingTasks.wdl'
- 'verification/VerifyJointGenotyping.wdl'
- 'verification/VerifyTasks.wdl'
- 'verification/VerifyMetrics.wdl'
- 'verification/VerifyGermlineSingleSample.wdl'
- 'verification/VerifyNA12878.wdl'
- 'verification/test-wdls/TestJointGenotyping.wdl'
- 'tasks/broad/Utilities.wdl'
- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
- '.github/workflows/test_joint_genotyping.yml'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
inputs:
useCallCache:
description: 'Use call cache (default: true)'
required: false
default: "true"
updateTruth:
description: 'Update truth files (default: false)'
required: false
default: "false"
testType:
description: 'Specify the type of test (Plumbing or Scientific)'
required: true
truthBranch:
description: 'Specify the branch for truth files (default: master)'
required: false
default: "master"
env:
# pipeline configuration
PROJECT_NAME: WARP
PIPELINE_NAME: TestJointGenotyping
DOCKSTORE_PIPELINE_NAME: JointGenotyping
PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping"
# workspace configuration
TESTING_WORKSPACE: WARP Tests
WORKSPACE_NAMESPACE: warp-pipelines
# github repo configuration
REPOSITORY_NAME: ${{ github.event.repository.name }}
# service account configuration
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }}
USER: [email protected]
jobs:
run_pipeline:
runs-on: ubuntu-latest
# Add "id-token" with the intended permissions.
permissions:
contents: 'read'
id-token: 'write'
steps:
# actions/checkout MUST come before auth action
- uses: actions/checkout@v3
with:
ref: ${{ github.ref }}
- name: Set up python
id: setup-python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
cd scripts/firecloud_api/
pip install -r requirements.txt
- name: Set Branch Name
id: set_branch
run: |
if [ -z "${{ github.head_ref }}" ]; then
echo "Branch name is missing, using ${GITHUB_REF##*/}"
echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV
else
echo "Branch name from PR: ${{ github.head_ref }}"
echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV
fi
- name: Determine Github Commit Hash
id: determine_github_commit_hash
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "Using github.sha for manually triggered workflow."
echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV
elif [ "${{ github.event_name }}" == "pull_request" ]; then
echo "Using github.event.pull_request.head.sha for PR-triggered workflow."
echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
else
echo "Unsupported event type: ${{ github.event_name }}"
exit 1
fi
- name: Fetch Dockstore Workflow Commit Hash
run: |
# Wait 5.5 minutes for Dockstore to update
sleep 330
DOCKSTORE_COMMIT_HASH_FROM_FETCH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \
$DOCKSTORE_TOKEN \
$DOCKSTORE_PIPELINE_NAME \
$BRANCH_NAME)
# Export the commit hash as an environment variable
echo "DOCKSTORE_COMMIT_HASH=$DOCKSTORE_COMMIT_HASH_FROM_FETCH" >> $GITHUB_ENV
echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH_FROM_FETCH"
env:
## TODO NEED TO ADD DOCKSTORE_TOKEN FOR SERVICE ACCOUNT ##
DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }}
DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }}
BRANCH_NAME: ${{ env.BRANCH_NAME }}
- name: Compare Dockstore and Commit Hashes
id: compare_hashes
run: |
echo "Comparing hashes..."
echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH"
echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH"
if [ "$DOCKSTORE_COMMIT_HASH" != "$GITHUB_COMMIT_HASH" ]; then
echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash!"
echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH"
exit 1
else
echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash."
fi
env:
DOCKSTORE_COMMIT_HASH: ${{ env.DOCKSTORE_COMMIT_HASH }}
GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }}
- name: Set Test Type
id: set_test_type
run: |
if [ "${{ github.event_name }}" == "pull_request" ]; then
# For PRs, set based on target branch
if [ "${{ github.base_ref }}" == "master" ]; then
echo "testType=Scientific" >> $GITHUB_ENV
echo "testType=Scientific"
else
echo "testType=Plumbing" >> $GITHUB_ENV
echo "testType=Plumbing"
fi
else
# For workflow_dispatch, use provided test type
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV
echo "testType=${{ github.event.inputs.testType }}"
fi
- name: Create new method configuration
run: |
echo "Creating new method configuration for branch: $BRANCH_NAME"
METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \
create_new_method_config \
--workspace-namespace $WORKSPACE_NAMESPACE \
--workspace-name "$TESTING_WORKSPACE" \
--pipeline_name "$PIPELINE_NAME" \
--branch_name "$BRANCH_NAME" \
--sa-json-b64 "$SA_JSON_B64" \
--user "$USER")
echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV
env:
PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
USER: ${{ env.USER }}
- name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs
run: |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
MAX_RETRIES=2
RETRY_DELAY=300 # 300 seconds = 5 minutes
# Initialize variables to aggregate statuses and outputs
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
ALL_OUTPUTS=""
# Initialize arrays to track submission and workflow statuses
declare -a SUBMISSION_IDS
declare -A WORKFLOW_STATUSES
OVERALL_SUCCESS=true
# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
if [ "$UPDATE_TRUTH" = "true" ]; then
UPDATE_TRUTH_BOOL=true
else
UPDATE_TRUTH_BOOL=false
fi
if [ "$USE_CALL_CACHE" == "true" ]; then
USE_CALL_CACHE_BOOL=true
else
USE_CALL_CACHE_BOOL=false
fi
TEST_TYPE="${{ env.testType }}"
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
echo "Running tests with test type: $TEST_TYPE"
TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
echo "Truth path: $TRUTH_PATH"
RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME"
# Create the submission_data.json file which will be the same for all inputs
SUBMISSION_DATA_FILE="submission_data.json"
# Use a heredoc to generate the JSON file content dynamically
cat <<EOF > "$SUBMISSION_DATA_FILE"
{
"methodConfigurationNamespace": "$WORKSPACE_NAMESPACE",
"methodConfigurationName": "$METHOD_CONFIG_NAME",
"useCallCache": $USE_CALL_CACHE_BOOL,
"deleteIntermediateOutputFiles": false,
"useReferenceDisks": true,
"memoryRetryMultiplier": 1.2,
"workflowFailureMode": "NoNewCalls",
"userComment": "Automated submission",
"ignoreEmptyOutputs": false
}
EOF
echo "Created submission data file: $SUBMISSION_DATA_FILE"
# 1. Submit all jobs first and store their submission IDs
for input_file in "$INPUTS_DIR"/*.json; do
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
--results_path "$RESULTS_PATH" \
--inputs_json "$input_file" \
--update_truth "$UPDATE_TRUTH_BOOL" \
--branch_name "$BRANCH_NAME" )
echo "Uploading the test input file: $test_input_file"
python3 scripts/firecloud_api/firecloud_api.py \
upload_test_inputs \
--workspace-namespace $WORKSPACE_NAMESPACE \
--workspace-name "$TESTING_WORKSPACE" \
--pipeline_name "$PIPELINE_NAME" \
--test_input_file "$test_input_file" \
--branch_name "$BRANCH_NAME" \
--sa-json-b64 "$SA_JSON_B64" \
--user "$USER"
attempt=1
while [ $attempt -le $MAX_RETRIES ]; do
SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \
--workspace-namespace "$WORKSPACE_NAMESPACE" \
--workspace-name "$TESTING_WORKSPACE" \
--sa-json-b64 "$SA_JSON_B64" \
--user "$USER" \
--submission_data_file "$SUBMISSION_DATA_FILE")
echo "Submission ID: $SUBMISSION_ID"
if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then
echo "Error in submission, retrying in $RETRY_DELAY seconds..."
((attempt++))
if [ $attempt -gt $MAX_RETRIES ]; then
echo "Max retries reached. Exiting..."
exit 1
fi
sleep $RETRY_DELAY
continue
fi
echo "Submission successful. Submission ID: $SUBMISSION_ID"
SUBMISSION_IDS+=("$SUBMISSION_ID")
break
done
done
echo "All jobs have been submitted. Starting to poll for statuses..."
# 2. After all submissions are done, start polling for statuses of all jobs
for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
attempt=1
while [ $attempt -le $MAX_RETRIES ]; do
echo "Polling for Submission ID: $SUBMISSION_ID"
RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \
--submission_id "$SUBMISSION_ID" \
--sa-json-b64 "$SA_JSON_B64" \
--user "$USER" \
--workspace-namespace "$WORKSPACE_NAMESPACE" \
--workspace-name "$TESTING_WORKSPACE")
if [ -z "$RESPONSE" ]; then
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
OVERALL_SUCCESS=false
((attempt++))
if [ $attempt -gt $MAX_RETRIES ]; then
echo "Max retries reached. Exiting..."
exit 1
fi
sleep $RETRY_DELAY
continue
fi
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION"
# Check if any workflow failed or errored
FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key')
if [ ! -z "$FAILED_WORKFLOWS" ]; then
echo "Failed workflows detected:"
echo "$FAILED_WORKFLOWS"
OVERALL_SUCCESS=false
fi
# retrieve workflow outputs
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \
--user "$USER" \
--sa-json-b64 "$SA_JSON_B64" \
--submission_id "$SUBMISSION_ID" \
--workspace-namespace $WORKSPACE_NAMESPACE \
--workspace-name "$TESTING_WORKSPACE" \
--workflow_id "$WORKFLOW_ID" \
--pipeline_name "$PIPELINE_NAME")
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
done
break
done
# Generate final summary tables with hyperlinks for Submission IDs
echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
# Generate the Terra URL for the submission
SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID"
# Add the Submission ID as a hyperlink
echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
# Add the workflows and statuses for this submission
echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
# Add a blank line for separation
echo "" >> $GITHUB_STEP_SUMMARY
done
done
# Exit with error if any workflows failed
if [ "$OVERALL_SUCCESS" = false ]; then
echo "One or more workflows failed in Terra. Check the workflow status summary for details."
exit 1
fi
env:
PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }}
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
USER: ${{ env.USER }}
DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }}
PIPELINE_DIR: ${{ env.PIPELINE_DIR }}
- name: Delete Method Configuration
if: always() # Ensures it runs regardless of success or failure
run: |
echo "Deleting method configuration for branch: $BRANCH_NAME"
DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \
--workspace-namespace $WORKSPACE_NAMESPACE \
--workspace-name "$TESTING_WORKSPACE" \
--pipeline_name "$PIPELINE_NAME" \
--branch_name "$BRANCH_NAME" \
--sa-json-b64 "$SA_JSON_B64" \
--user "$USER" \
--method_config_name "$METHOD_CONFIG_NAME")
echo "Delete response: $DELETE_RESPONSE"
if [ "$DELETE_RESPONSE" == "True" ]; then
echo "Method configuration deleted successfully."
else
echo "Error: Method configuration deletion failed."
exit 1
fi
env:
PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
BRANCH_NAME: ${{ env.BRANCH_NAME }}
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }}
METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }}
WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
USER: ${{ env.USER }}
- name: Print Summary on Success
if: success()
run: |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY
- name: Print Summary on Failure
if: failure()
run: |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY