Test Illumina Genotyping Array #547

Summary
Jobs
- run_pipeline
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/test_illumina_genotyping_array.yml at 2b84150

	name: Test Illumina Genotyping Array

	# Controls when the workflow will run
	on:
	#run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING
	# push:
	# branches:
	# - kp_GHA_Terra_auth_PD-2682
	pull_request:
	branches: [ "develop", "staging", "master" ]
	# Only run if files in these paths changed:
	####################################
	# SET PIPELINE SPECIFIC PATHS HERE #
	####################################
	paths:
	- 'pipelines/broad/genotyping/illumina/**'
	- 'tasks/broad/IlluminaGenotypingArrayTasks.wdl'
	- 'tasks/broad/Qc.wdl'
	- 'verification/VerifyIlluminaGenotypingArray.wdl'
	- 'verification/test-wdls/TestIlluminaGenotypingArray.wdl'
	- 'tasks/broad/Utilities.wdl'
	- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
	- '.github/workflows/test_illumina_genotyping_array.yml'


	# Allows you to run this workflow manually from the Actions tab
	workflow_dispatch:
	inputs:
	useCallCache:
	description: 'Use call cache (default: true)'
	required: false
	default: "true"
	updateTruth:
	description: 'Update truth files (default: false)'
	required: false
	default: "false"
	testType:
	description: 'Specify the type of test (Plumbing or Scientific)'
	required: true
	truthBranch:
	description: 'Specify the branch for truth files (default: master)'
	required: false
	default: "master"


	env:
	PROJECT_NAME: WARP
	# Github repo name
	REPOSITORY_NAME: ${{ github.event.repository.name }}
	SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_CREDENTIALS }}


	jobs:
	run_pipeline:
	runs-on: ubuntu-latest
	# Add "id-token" with the intended permissions.
	permissions:
	contents: 'read'
	id-token: 'write'

	steps:
	# Add a step to wait to account for github -> dockstore -> terra delays
	- name: Wait Before Starting
	run: \|
	echo "Waiting for 5 minutes before starting..."
	sleep 1 # time in seconds, update this when we really want a delay
	# actions/checkout MUST come before auth
	- uses: actions/checkout@v3
	with:
	ref: ${{ github.ref }}

	# id: 'auth'
	# name: 'Authenticate to Google Cloud'
	# uses: 'google-github-actions/auth@v2'
	# with:
	# token_format: 'access_token'
	# # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting
	# # This is provided by the DevOps team - do not change!
	# workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider'
	# # This is our tester service account
	# service_account: '[email protected]'
	# access_token_lifetime: '3600' # seconds, default is 3600
	# access_token_scopes: 'profile, email, openid'

	- name: Set up python
	id: setup-python
	uses: actions/setup-python@v4
	with:
	python-version: '3.11'
	- name: Install dependencies
	run: \|
	pwd
	cd scripts/firecloud_api/
	pip install -r requirements.txt

	- name: Set Commit Hash
	id: set_commit_hash
	run: echo "COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV

	# Set the branch name.
	# github.head_ref contains the name of the branch in the context of a pull request
	# if github.head_ref is empty, it implies the workflow was triggered manually
	# ${GITHUB_REF##*/} extracts the branch name from GITHUB_REF.
	# The ##*/ is a parameter expansion that removes the refs/heads/ prefix, leaving just the branch name.
	- name: Set Branch Name
	id: set_branch
	run: \|
	if [ -z "${{ github.head_ref }}" ]; then
	echo "Branch name is missing, using ${GITHUB_REF##*/}"
	echo "branch_name=${GITHUB_REF##*/}" >> $GITHUB_ENV
	else
	echo "Branch name from PR: ${{ github.head_ref }}"
	echo "branch_name=${{ github.head_ref }}" >> $GITHUB_ENV
	fi

	- name: Set Test Type for PRs
	if: ${{ github.event_name == 'pull_request' }}
	id: set_test_type
	run: \|
	# Default to "Scientific" if targeting master
	if [ "${{ github.base_ref }}" == "master" ]; then
	echo "testType=Scientific" >> $GITHUB_ENV
	else
	echo "testType=Plumbing" >> $GITHUB_ENV
	fi

	- name: Use Provided Test Type
	if: ${{ github.event_name == 'workflow_dispatch' }}
	id: use_provided_test_type
	run: \|
	# Use the testType provided by the user
	echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV

	- name: Update test inputs and Upload to Terra
	run: \|
	UPDATE_TRUTH="${{ github.event.inputs.updateTruth \|\| 'false' }}"
	USE_CALL_CACHE="${{ github.event.inputs.useCallCache \|\| 'true' }}"
	TRUTH_BRANCH="${{ github.event.inputs.truthBranch \|\| 'master' }}"
	CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
	MAX_RETRIES=2
	RETRY_DELAY=300 # 300 seconds = 5 minutes
	# Initialize variables to aggregate statuses and outputs
	ALL_WORKFLOW_STATUSES="Workflow ID \| Status"$'\n'"--- \| ---"
	ALL_OUTPUTS=""
	# Initialize arrays to track submission and workflow statuses
	declare -a SUBMISSION_IDS
	declare -A WORKFLOW_STATUSES


	# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
	if [ "$UPDATE_TRUTH" = "true" ]; then
	UPDATE_TRUTH_BOOL=true
	else
	UPDATE_TRUTH_BOOL=false
	fi

	if [ "$USE_CALL_CACHE" == "true" ]; then
	USE_CALL_CACHE_BOOL=true
	else
	USE_CALL_CACHE_BOOL=false
	fi

	PIPELINE_NAME="TestIlluminaGenotypingArray"
	PIPELINE_DIR="pipelines/broad/genotyping/illumina"
	TEST_TYPE="${{ env.testType }}"
	INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
	echo "Running tests with test type: $TEST_TYPE"

	TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" \| tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
	echo "Truth path: $TRUTH_PATH"
	RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"

	# Create the submission_data.json file which will be the same for all inputs
	SUBMISSION_DATA_FILE="submission_data.json"

	# Use a heredoc to generate the JSON file content dynamically
	cat <<EOF > "$SUBMISSION_DATA_FILE"
	{
	"methodConfigurationNamespace": "warp-pipelines",
	"methodConfigurationName": "$PIPELINE_NAME",
	"useCallCache": $USE_CALL_CACHE_BOOL,
	"deleteIntermediateOutputFiles": false,
	"useReferenceDisks": true,
	"memoryRetryMultiplier": 1.2,
	"workflowFailureMode": "NoNewCalls",
	"userComment": "Automated submission",
	"ignoreEmptyOutputs": false
	}
	EOF

	echo "Created submission data file: $SUBMISSION_DATA_FILE"

	for input_file in "$INPUTS_DIR"/*.json; do
	echo "Processing input file: $input_file"
	test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
	--results_path "$RESULTS_PATH" \
	--inputs_json "$input_file" \
	--update_truth "$UPDATE_TRUTH_BOOL" \
	--commit_hash "$COMMIT_HASH" )
	echo "Uploading the test input file: $test_input_file"
	echo "Branch name: $branch_name"

	python3 scripts/firecloud_api/firecloud_api2.py \
	upload_test_inputs \
	--workspace-namespace warp-pipelines \
	--workspace-name "WARP Tests" \
	--pipeline_name "$PIPELINE_NAME" \
	--test_input_file "$test_input_file" \
	--branch_name "$branch_name" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "[email protected]"

	attempt=1

	while [ $attempt -le $MAX_RETRIES ]; do
	echo "Attempt $attempt: Submitting job for input file: $input_file"

	#SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \
	# --workspace-namespace "warp-pipelines" \
	# --workspace-name "WARP Tests" \
	# --sa-json-b64 "$SA_JSON_B64" \
	# --user "[email protected]" \
	# --submission_data_file "$SUBMISSION_DATA_FILE")
	#

	SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \
	--workspace-namespace "warp-pipelines" \
	--workspace-name "WARP Tests" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "[email protected]" \
	--submission_data_file "$SUBMISSION_DATA_FILE")

	echo "Submission ID: $SUBMISSION_ID"

	if [[ "$SUBMISSION_ID" == "404" \|\| -z "$SUBMISSION_ID" ]]; then
	echo "Error in submission, retrying in $RETRY_DELAY seconds..."
	((attempt++))
	if [ $attempt -gt $MAX_RETRIES ]; then
	echo "Max retries reached. Exiting..."
	exit 1
	fi
	sleep $RETRY_DELAY
	continue
	fi

	echo "Submission successful. Submission ID: $SUBMISSION_ID"
	SUBMISSION_IDS+=("$SUBMISSION_ID")
	break
	done
	done

	- name: Monitor Workflow Status
	run: \|
	echo "Monitoring the status of submitted workflows..."
	echo "Submission IDs from the Submit Jobs step: ${{ steps.submit_jobs.outputs.submission_ids }}"
	IFS=',' read -r -a SUBMISSION_IDS <<< "$SUBMISSION_IDS"
	for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
	echo "Polling submission status for Submission ID: $SUBMISSION_ID"
	SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py poll_job_status \
	--submission_id "$SUBMISSION_ID" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "[email protected]" \
	--workspace-namespace "warp-pipelines" \
	--workspace-name "WARP Tests")


	if [ -z "$SUBMISSION_ID" ]; then
	echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
	continue
	fi

	# Parse and store workflow statuses
	WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$SUBMISSION_ID" \| jq -r 'to_entries \| map(.key + " \| " + .value) \| .[]')
	echo "Statuses for submission $SUBMISSION_ID:"
	echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"

	# Append to aggregate statuses
	WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION

	# Retrieve workflow outputs
	echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
	for WORKFLOW_ID in $(echo "$SUBMISSION_ID" \| jq -r 'keys[]'); do
	WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api2.py get_outputs \
	--submission_id "$SUBMISSION_ID" \
	--workflow_id "$WORKFLOW_ID" \
	--pipeline_name "$PIPELINE_NAME")
	ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
	done
	done

	# Generate summary for Submission IDs
	echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
	for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
	# Generate the Terra URL for the submission
	SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"

	# Add the Submission ID as a hyperlink
	echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY

	# Add the workflows and statuses for this submission
	echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY

	# Add a blank line for separation
	echo "" >> $GITHUB_STEP_SUMMARY
	done
	env:
	SUBMISSION_IDS: ${{ steps.submit_jobs.outputs.submission_ids }} # Pass IDs from a previous step
	PIPELINE_NAME: TestIlluminaGenotypingArray
	NAMESPACE: warp-pipelines
	WORKSPACE: WARP Tests



	#- name: Update and Upload method configuration
	# id: pipeline_run
	# run: \|
	# # Set common environment variables
	# TOKEN="${{ steps.auth.outputs.access_token }}"
	# NAMESPACE="warp-pipelines"
	# WORKSPACE="WARP Tests"
	# USE_CALL_CACHE="${{ github.event.inputs.useCallCache \|\| 'true' }}"
	# UPDATE_TRUTH="${{ github.event.inputs.updateTruth \|\| 'false' }}"
	# #TEST_TYPE="${{ github.event.inputs.testType \|\| 'Plumbing' }}"
	# TEST_TYPE="${{ env.testType }}"
	# TRUTH_BRANCH="${{ github.event.inputs.truthBranch \|\| 'master' }}"
	# CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
	#
	# echo "truth branch: $TRUTH_BRANCH"
	#
	# ########################################
	# # SET PIPELINE SPECIFIC VARIABLES HERE #
	# ########################################
	# PIPELINE_NAME="TestIlluminaGenotypingArray"
	# PIPELINE_DIR="pipelines/broad/genotyping/illumina"
	# # TODO: Need to set the truth and result paths appropriately
	# # TODO: Need to dynamically set the truth branch, for now it is hardcoded to master branch
	# # We may want to keep the truth and resuts buckets separate for TTL reasons
	# TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" \| tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
	# RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"
	#
	#
	# # Function to call the Firecloud API using the firecloud_api2.py script
	# firecloud_action() {
	# python3 scripts/firecloud_api/firecloud_api2.py --action "$1" "${@:2}"
	# }
	#
	#
	# # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
	# if [ "$USE_CALL_CACHE" == "true" ]; then
	# USE_CALL_CACHE_BOOL=true
	# else
	# USE_CALL_CACHE_BOOL=false
	# fi
	#
	#
	# # Convert UPDATE_TRUTH to a boolean-friendly format ("true" -> true, "false" -> false)
	# if [ "$UPDATE_TRUTH" = "true" ]; then
	# UPDATE_TRUTH_BOOL=true
	# else
	# UPDATE_TRUTH_BOOL=false
	# fi
	#
	# # Create the submission_data.json file which will be the same for all inputs
	# SUBMISSION_DATA_FILE="submission_data.json"
	#
	# # Use a heredoc to generate the JSON file content dynamically
	# cat <<EOF > "$SUBMISSION_DATA_FILE"
	# {
	# "methodConfigurationNamespace": "warp-pipelines",
	# "methodConfigurationName": "$PIPELINE_NAME",
	# "useCallCache": $USE_CALL_CACHE_BOOL,
	# "deleteIntermediateOutputFiles": false,
	# "useReferenceDisks": true,
	# "memoryRetryMultiplier": 1.2,
	# "workflowFailureMode": "NoNewCalls",
	# "userComment": "Automated submission",
	# "ignoreEmptyOutputs": false
	# }
	# EOF
	# echo "Created submission data file: $SUBMISSION_DATA_FILE"
	#
	# # Initialize variables to aggregate statuses and outputs
	# ALL_WORKFLOW_STATUSES="Workflow ID \| Status"$'\n'"--- \| ---"
	# ALL_OUTPUTS=""
	#
	# # Initialize arrays to track submission and workflow statuses
	# declare -a SUBMISSION_IDS
	# declare -A WORKFLOW_STATUSES
	#
	# # Loop through each file in the appropriate test inputs directory
	# INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
	#
	# echo "Running tests with test type: $TEST_TYPE"
	#
	# MAX_RETRIES=2
	# RETRY_DELAY=300 # 300 seconds = 5 minutes
	#
	# for input_file in "$INPUTS_DIR"/*.json; do
	# echo "Processing input file: $input_file"
	# test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
	# --results_path "$RESULTS_PATH" \
	# --inputs_json "$input_file" \
	# --update_truth "$UPDATE_TRUTH_BOOL" \
	# --commit_hash "$COMMIT_HASH" )
	# echo "Uploading the test input file: $test_input_file"
	# echo "Branch name: $branch_name"
	#
	# python3 scripts/firecloud_api/firecloud_api2.py upload_test_inputs \
	# --workspace-namespace warp-pipelines \
	# --workspace-name "WARP Tests" \
	# --pipeline_name "$PIPELINE_NAME" \
	# --test_input_file "$test_input_file" \
	# --branch_name "$branch_name" \
	# --sa-json-b64 "$SA_JSON_B64" \
	# --user "[email protected]"
	# done

	# attempt=1
	# while [ $attempt -le $MAX_RETRIES ]; do
	# echo "Attempt $attempt: Submitting job for input file: $input_file"
	# #echo "Submitting job for input file: $input_file"
	# cat "$SUBMISSION_DATA_FILE"
	# SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE")
	#
	# if [[ "$SUBMISSION_ID" == "404" ]]; then
	# echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..."
	# sleep $RETRY_DELAY
	# ((attempt++))
	# elif [ -z "$SUBMISSION_ID" ]; then
	# echo "Submission failed for input file: $input_file. No submission ID received."
	# break
	# else
	# echo "Submission successful. Submission ID: $SUBMISSION_ID"
	# SUBMISSION_IDS+=("$SUBMISSION_ID")
	# break
	# fi
	#
	# if [ $attempt -gt $MAX_RETRIES ]; then
	# echo "Max retries reached. Exiting..."
	# fi
	# done
	# done
	#
	# #echo "Submission ID: $SUBMISSION_ID"
	# #SUBMISSION_IDS+=("$SUBMISSION_ID")
	#
	#
	# echo "Monitoring the status of submitted workflows..."
	# for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
	# echo "Polling submission status for Submission ID: $SUBMISSION_ID"
	# RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID")
	#
	# if [ -z "$RESPONSE" ]; then
	# echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
	# continue
	# fi
	#
	# # Parse and store workflow statuses
	# WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" \| jq -r 'to_entries \| map(.key + " \| " + .value) \| .[]')
	# echo "Statuses for submission $SUBMISSION_ID:"
	# echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"
	#
	# # Append to aggregate statuses
	# WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION
	#
	# # retrieve workflow outputs
	# echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
	# for WORKFLOW_ID in $(echo "$RESPONSE" \| jq -r 'keys[]'); do
	# WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME")
	# ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
	# done
	# done
	#
	# # Generate final summary tables with hyperlinks for Submission IDs
	# echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
	# for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
	# # Generate the Terra URL for the submission
	# SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"
	#
	# # Add the Submission ID as a hyperlink
	# echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
	#
	# # Add the workflows and statuses for this submission
	# echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
	#
	# # Add a blank line for separation
	# echo "" >> $GITHUB_STEP_SUMMARY
	# done

	- name: Download Commit Hash from GCP
	run: \|
	gsutil cp gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/commit_hash.txt ./commit_hash.txt

	- name: Check Commit Hash
	id: check_commit_hash
	run: \|
	# Read the commit hash from the downloaded file
	COMMIT_HASH_FROM_WDL=$(cat commit_hash.txt)

	# Compare the two commit hashes
	if [ "$COMMIT_HASH_FROM_WDL" != "${{ env.COMMIT_HASH }}" ]; then
	echo "Error: The commit hash from the WDL output does not match the expected commit hash."
	exit 1
	else
	echo "Commit hash match successful: $COMMIT_HASH_FROM_WDL"
	fi

	- name: Print Summary on Success
	if: success()
	run: \|
	echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY

	- name: Print Summary on Failure
	if: failure()
	run: \|
	echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test Illumina Genotyping Array #547

Workflow file

Test Illumina Genotyping Array #547

Jobs

Run details

Workflow file for this run