From a9a35089f71a3e3c2c2ce05458aede53c04d2fd4 Mon Sep 17 00:00:00 2001 From: npetrill Date: Mon, 2 Dec 2024 15:09:50 -0500 Subject: [PATCH] break things down --- .../test_illumina_genotyping_array.yml | 227 ++++-------------- 1 file changed, 41 insertions(+), 186 deletions(-) diff --git a/.github/workflows/test_illumina_genotyping_array.yml b/.github/workflows/test_illumina_genotyping_array.yml index 43d23805cb..525e373941 100644 --- a/.github/workflows/test_illumina_genotyping_array.yml +++ b/.github/workflows/test_illumina_genotyping_array.yml @@ -106,200 +106,55 @@ jobs: # Use the testType provided by the user echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV - - name: Submit job, poll status, and get outputs - id: pipeline_run + - name: Create Submission Data File run: | - # Set common environment variables - TOKEN="${{ steps.auth.outputs.access_token }}" - NAMESPACE="warp-pipelines" - WORKSPACE="WARP Tests" - USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" - UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" - #TEST_TYPE="${{ github.event.inputs.testType || 'Plumbing' }}" - TEST_TYPE="${{ env.testType }}" - TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" - CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") - - echo "truth branch: $TRUTH_BRANCH" - - ######################################## - # SET PIPELINE SPECIFIC VARIABLES HERE # - ######################################## - PIPELINE_NAME="TestIlluminaGenotypingArray" - PIPELINE_DIR="pipelines/broad/genotyping/illumina" - # TODO: Need to set the truth and result paths appropriately - # TODO: Need to dynamically set the truth branch, for now it is hardcoded to master branch - # We may want to keep the truth and resuts buckets separate for TTL reasons - TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" - RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME" + # Creating the submission data file for job submission + echo "Creating submission data file..." + # All necessary data preparation steps here - # Function to call the Firecloud API using the firecloud_api.py script - firecloud_action() { - python3 scripts/firecloud_api/firecloud_api.py --token "$TOKEN" --namespace "$NAMESPACE" --workspace "$WORKSPACE" --action "$1" "${@:2}" - } - - # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) - if [ "$USE_CALL_CACHE" == "true" ]; then - USE_CALL_CACHE_BOOL=true - else - USE_CALL_CACHE_BOOL=false - fi + - name: Submit Job + id: submit_job + run: | + echo "Submitting job..." + # Submit the job here and store the submission ID + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py --token "${{ steps.auth.outputs.access_token }}" --action submit_job "$@") + echo "Submission ID: $SUBMISSION_ID" + echo "submission_id=$SUBMISSION_ID" >> $GITHUB_ENV + + - name: Poll Status + id: poll_status + run: | + echo "Polling status for submission ID: ${{ env.submission_id }}" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py --token "${{ steps.auth.outputs.access_token }}" --action poll_status --submission_id "${{ env.submission_id }}") - - # Convert UPDATE_TRUTH to a boolean-friendly format ("true" -> true, "false" -> false) - if [ "$UPDATE_TRUTH" = "true" ]; then - UPDATE_TRUTH_BOOL=true - else - UPDATE_TRUTH_BOOL=false + # Check if polling returned any data or an error + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: ${{ env.submission_id }}" + exit 1 fi - - # Create the submission_data.json file which will be the same for all inputs - SUBMISSION_DATA_FILE="submission_data.json" - - # Use a heredoc to generate the JSON file content dynamically - cat < "$SUBMISSION_DATA_FILE" - { - "methodConfigurationNamespace": "warp-pipelines", - "methodConfigurationName": "$PIPELINE_NAME", - "useCallCache": $USE_CALL_CACHE_BOOL, - "deleteIntermediateOutputFiles": false, - "useReferenceDisks": true, - "memoryRetryMultiplier": 1.2, - "workflowFailureMode": "NoNewCalls", - "userComment": "Automated submission", - "ignoreEmptyOutputs": false - } - EOF - echo "Created submission data file: $SUBMISSION_DATA_FILE" - # Initialize variables to aggregate statuses and outputs - ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" - ALL_OUTPUTS="" - - # Initialize arrays to track submission and workflow statuses - declare -a SUBMISSION_IDS - declare -A WORKFLOW_STATUSES + # Store workflow statuses + echo "$RESPONSE" > workflow_statuses.json - # Loop through each file in the appropriate test inputs directory - INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" - - echo "Running tests with test type: $TEST_TYPE" - - MAX_RETRIES=2 - RETRY_DELAY=300 # 300 seconds = 5 minutes - - for input_file in "$INPUTS_DIR"/*.json; do - echo "Processing input file: $input_file" - test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ - --results_path "$RESULTS_PATH" \ - --inputs_json "$input_file" \ - --update_truth "$UPDATE_TRUTH_BOOL") - echo "Uploading the test input file: $test_input_file" - echo "Branch name: $branch_name" - - firecloud_action upload_test_inputs --pipeline_name $PIPELINE_NAME --test_input_file "$test_input_file" --branch_name $branch_name - attempt=1 - while [ $attempt -le $MAX_RETRIES ]; do - echo "Attempt $attempt: Submitting job for input file: $input_file" - #echo "Submitting job for input file: $input_file" - cat "$SUBMISSION_DATA_FILE" - SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE") - - if [[ "$SUBMISSION_ID" == *"404"* ]]; then - echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..." - sleep $RETRY_DELAY - ((attempt++)) - elif [ -z "$SUBMISSION_ID" ]; then - echo "Submission failed for input file: $input_file. No submission ID received." - break - else - echo "Submission successful. Submission ID: $SUBMISSION_ID" - SUBMISSION_IDS+=("$SUBMISSION_ID") - break - fi - - if [ $attempt -gt $MAX_RETRIES ]; then - echo "Max retries reached. Exiting..." - fi - done - done + - name: Get Outputs + id: get_outputs + run: | + echo "Retrieving outputs for submission ID: ${{ env.submission_id }}" + WORKFLOW_IDS=$(jq -r 'keys[]' workflow_statuses.json) - #echo "Submission ID: $SUBMISSION_ID" - #SUBMISSION_IDS+=("$SUBMISSION_ID") - - # Function to refresh token - refresh_token() { - echo "Refreshing Google Cloud authentication token..." - # Re-authenticate and get a new token - TOKEN=$(gcloud auth application-default print-access-token) - echo "New token retrieved: $TOKEN" - } - - - echo "Monitoring the status of submitted workflows..." - for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do - echo "Polling submission status for Submission ID: $SUBMISSION_ID" - - # Check if the token is expired or close to expiration and refresh it if necessary - CURRENT_TIME_EPOCH=$(date +%s) - TOKEN_EXPIRATION_TIME=$(gcloud auth application-default print-access-token --format='value(expiry)') - echo "Raw token expiration time: $TOKEN_EXPIRATION_TIME" - - # Extract the valid datetime portion (first part before the semicolon) - TOKEN_EXPIRATION_DATETIME=$(echo "$TOKEN_EXPIRATION_TIME" | awk -F';' '{print $1}' | awk -F'=' '{print $2}') - echo "Parsed token expiration datetime: $TOKEN_EXPIRATION_DATETIME" - - # Convert the parsed datetime to epoch time - EXPIRATION_TIME_EPOCH=$(date -d "$TOKEN_EXPIRATION_DATETIME" +%s) - TOKEN_LIFETIME_THRESHOLD=300 # Set the threshold to 5 minutes before expiration - - # Check and refresh token if necessary - if (( EXPIRATION_TIME_EPOCH - CURRENT_TIME_EPOCH <= TOKEN_LIFETIME_THRESHOLD )); then - echo "Token is nearing expiration or expired. Refreshing token..." - refresh_token - else - echo "Token is valid. No refresh needed." - fi + for WORKFLOW_ID in $WORKFLOW_IDS; do + OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py --token "${{ steps.auth.outputs.access_token }}" --action get_outputs --submission_id "${{ env.submission_id }}" --workflow_id "$WORKFLOW_ID") + echo "Workflow Output for $WORKFLOW_ID: $OUTPUT" + echo "$OUTPUT" >> final_outputs.json + done - # Poll the status using the fresh token - RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID") - - if [ -z "$RESPONSE" ]; then - echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" - continue - fi - - # Parse and store workflow statuses - WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') - echo "Statuses for submission $SUBMISSION_ID:" - echo "$WORKFLOW_STATUSES_FOR_SUBMISSION" - - # Append to aggregate statuses - WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION - - # retrieve workflow outputs - echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." - for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do - WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME") - ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' - done - done - - # Generate final summary tables with hyperlinks for Submission IDs - echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY - for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do - # Generate the Terra URL for the submission - SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID" - - # Add the Submission ID as a hyperlink - echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY - - # Add the workflows and statuses for this submission - echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY - - # Add a blank line for separation - echo "" >> $GITHUB_STEP_SUMMARY - done + - name: Summarize and Print Results + id: summarize_results + run: | + echo "Summarizing the final results..." + # Process and print the results (outputs, statuses, etc.) + cat final_outputs.json + echo "Pipeline run complete!" - name: Print Summary on Success if: success()