.github/workflows/test_illumina_genotyping_array.yml

name: Test Illumina Genotyping Array

# Controls when the workflow will run
on:
  #run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING
  # push:
  #   branches:
  #     - kp_GHA_Terra_auth_PD-2682
  pull_request:
    branches: [ "develop", "staging", "master" ]
    # Only run if files in these paths changed:
    ####################################
    # SET PIPELINE SPECIFIC PATHS HERE #
    ####################################
    paths:
      - 'pipelines/broad/genotyping/illumina/**'
      - 'tasks/broad/IlluminaGenotypingArrayTasks.wdl'
      - 'tasks/broad/Qc.wdl'
      - 'verification/VerifyIlluminaGenotypingArray.wdl'
      - 'verification/test-wdls/TestIlluminaGenotypingArray.wdl'
      - 'tasks/broad/Utilities.wdl'
      - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
      - '.github/workflows/test_illumina_genotyping_array.yml'


  # Allows you to run this workflow manually from the Actions tab
  workflow_dispatch:
    inputs:
      useCallCache:
        description: 'Use call cache (default: true)'
        required: false
        default: "true"
      updateTruth:
        description: 'Update truth files (default: false)'
        required: false
        default: "false"
      testType:
        description: 'Specify the type of test (Plumbing or Scientific)'
        required: true
      truthBranch:
        description: 'Specify the branch for truth files (default: master)'
        required: false
        default: "master"


env:
  PROJECT_NAME: WARP
  # Github repo name
  REPOSITORY_NAME: ${{ github.event.repository.name }}
  SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_CREDENTIALS }}


jobs:
  run_pipeline:
    runs-on: ubuntu-latest
    # Add "id-token" with the intended permissions.
    permissions:
      contents: 'read'
      id-token: 'write'

    steps:
      # Add a step to wait to account for github -> dockstore -> terra delays
      - name: Wait Before Starting
        run: |
          echo "Waiting for 5 minutes before starting..."
          sleep 1  # time in seconds, update this when we really want a delay
      # actions/checkout MUST come before auth
      - uses: actions/checkout@v3
        with:
          ref: ${{ github.ref }}

      # id: 'auth'
      # name: 'Authenticate to Google Cloud'
      # uses: 'google-github-actions/auth@v2'
      # with:
      #   token_format: 'access_token'
      #   # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting
      #   # This is provided by the DevOps team - do not change!
      #   workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider'
      #   # This is our tester service account
      #   service_account: 'pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com'
      #   access_token_lifetime: '3600' # seconds, default is 3600
      #   access_token_scopes: 'profile, email, openid'

      - name: Set up python
        id: setup-python
        uses: actions/setup-python@v4
        with:
          python-version: '3.11'
      - name: Install dependencies
        run: |
          pwd
          cd scripts/firecloud_api/
          pip install -r requirements.txt

      - name: Set Commit Hash
        id: set_commit_hash
        run: echo "COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV

        # Set the branch name.
      # github.head_ref contains the name of the branch in the context of a pull request
      # if github.head_ref is empty, it implies the workflow was triggered manually
      # ${GITHUB_REF##*/} extracts the branch name from GITHUB_REF.
      # The ##*/ is a parameter expansion that removes the refs/heads/ prefix, leaving just the branch name.
      - name: Set Branch Name
        id: set_branch
        run: |
          if [ -z "${{ github.head_ref }}" ]; then
            echo "Branch name is missing, using ${GITHUB_REF##*/}"
            echo "branch_name=${GITHUB_REF##*/}" >> $GITHUB_ENV
          else
            echo "Branch name from PR: ${{ github.head_ref }}"
            echo "branch_name=${{ github.head_ref }}" >> $GITHUB_ENV
          fi

      - name: Set Test Type for PRs
        if: ${{ github.event_name == 'pull_request' }}
        id: set_test_type
        run: |
          # Default to "Scientific" if targeting master
          if [ "${{ github.base_ref }}" == "master" ]; then
            echo "testType=Scientific" >> $GITHUB_ENV
          else
            echo "testType=Plumbing" >> $GITHUB_ENV
          fi

      - name: Use Provided Test Type
        if: ${{ github.event_name == 'workflow_dispatch' }}
        id: use_provided_test_type
        run: |
          # Use the testType provided by the user
          echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV

      - name: Update test inputs and Upload to Terra
        run: |
          UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
          USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
          TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
          CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
          MAX_RETRIES=2
          RETRY_DELAY=300  # 300 seconds = 5 minutes
          # Initialize variables to aggregate statuses and outputs
          ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
          ALL_OUTPUTS=""
          # Initialize arrays to track submission and workflow statuses
          declare -a SUBMISSION_IDS
          declare -A WORKFLOW_STATUSES
          
          
          # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
          if [ "$UPDATE_TRUTH" = "true" ]; then
              UPDATE_TRUTH_BOOL=true
          else
              UPDATE_TRUTH_BOOL=false
          fi
          
          if [ "$USE_CALL_CACHE" == "true" ]; then
              USE_CALL_CACHE_BOOL=true
          else
              USE_CALL_CACHE_BOOL=false
          fi
          
          PIPELINE_NAME="TestIlluminaGenotypingArray"
          PIPELINE_DIR="pipelines/broad/genotyping/illumina"
          TEST_TYPE="${{ env.testType }}"
          INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
          echo "Running tests with test type: $TEST_TYPE"
          
          TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
          echo "Truth path: $TRUTH_PATH"
          RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"
          
          # Create the submission_data.json file which will be the same for all inputs
          SUBMISSION_DATA_FILE="submission_data.json"
          
          # Use a heredoc to generate the JSON file content dynamically
          cat <<EOF > "$SUBMISSION_DATA_FILE"
          {
            "methodConfigurationNamespace": "warp-pipelines",
            "methodConfigurationName": "$PIPELINE_NAME",
            "useCallCache": $USE_CALL_CACHE_BOOL,
            "deleteIntermediateOutputFiles": false,
            "useReferenceDisks": true,
            "memoryRetryMultiplier": 1.2,
            "workflowFailureMode": "NoNewCalls",
            "userComment": "Automated submission",
            "ignoreEmptyOutputs": false
          }
          EOF
          
          echo "Created submission data file: $SUBMISSION_DATA_FILE"
          
          for input_file in "$INPUTS_DIR"/*.json; do
            echo "Processing input file: $input_file"
            test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
                --results_path "$RESULTS_PATH" \
                --inputs_json "$input_file" \
                --update_truth "$UPDATE_TRUTH_BOOL" \
                --commit_hash "$COMMIT_HASH" )
            echo "Uploading the test input file: $test_input_file"
            echo "Branch name: $branch_name"
          
            python3 scripts/firecloud_api/firecloud_api2.py \
                upload_test_inputs \
                --workspace-namespace warp-pipelines \
                --workspace-name "WARP Tests" \
                --pipeline_name "$PIPELINE_NAME" \
                --test_input_file "$test_input_file" \
                --branch_name "$branch_name" \
                --sa-json-b64 "$SA_JSON_B64" \
                --user "pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com"

            attempt=1
             
            while [ $attempt -le $MAX_RETRIES ]; do
              echo "Attempt $attempt: Submitting job for input file: $input_file"
              SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api2.py submit_job \
                        --workspace-namespace "warp-pipelines" \
                        --workspace-name "WARP Tests" \
                        --sa-json-b64 "$SA_JSON_B64" \
                        --user "pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com" \
                        --submission_data_file "$SUBMISSION_DATA_FILE")
              
              echo "Submission ID: $SUBMISSION_ID"
                
              if [[ "$SUBMISSION_ID" == *"404"* ]]; then
                echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..."
                sleep $RETRY_DELAY
                ((attempt++))
              elif [ -z "$SUBMISSION_ID" ]; then
                echo "Submission failed for input file: $input_file. No submission ID received."
                break
              else
                echo "Submission successful. Submission ID: $SUBMISSION_ID"
                SUBMISSION_IDS+=("$SUBMISSION_ID")
                break
              fi
              if [ $attempt -gt $MAX_RETRIES ]; then
                echo "Max retries reached. Exiting..."
              fi
            done
          done
          echo "Generated Submission IDs: ${SUBMISSION_IDS[*]}"
          echo "::set-output name=submission_ids::$(IFS=,; echo "${SUBMISSION_IDS[*]}")"

      - name: Monitor Workflow Status
        run: |
          echo "Monitoring the status of submitted workflows..."
          echo "Submission IDs from the Submit Jobs step: ${{ steps.submit_jobs.outputs.submission_ids }}"
          IFS=',' read -r -a SUBMISSION_IDS <<< "$SUBMISSION_IDS"
          for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
            echo "Polling submission status for Submission ID: $SUBMISSION_ID"
            RESPONSE=$(python3 scripts/firecloud_api/firecloud_api2.py poll_job_status \
                --submission_id "$SUBMISSION_ID" \
                --sa-json-b64 "$SA_JSON_B64" \
                --user "pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com" \
                --workspace-namespace "warp-pipelines" \
                --workspace-name "WARP Tests")
      
            if [ -z "$RESPONSE" ]; then
              echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
              continue
            fi
      
            # Parse and store workflow statuses
            WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
            echo "Statuses for submission $SUBMISSION_ID:"
            echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"
      
            # Append to aggregate statuses
            WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION
      
            # Retrieve workflow outputs
            echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
            for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
              WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api2.py get_outputs \
                --submission_id "$SUBMISSION_ID" \
                --workflow_id "$WORKFLOW_ID" \
                --pipeline_name "$PIPELINE_NAME")
              ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
            done
          done
      
          # Generate summary for Submission IDs
          echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
          for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
            # Generate the Terra URL for the submission
            SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"
      
            # Add the Submission ID as a hyperlink
            echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
      
            # Add the workflows and statuses for this submission
            echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
      
            # Add a blank line for separation
            echo "" >> $GITHUB_STEP_SUMMARY
          done
        env:
          SUBMISSION_IDS: ${{ steps.submit_jobs.outputs.submission_ids }} # Pass IDs from a previous step
          PIPELINE_NAME: TestIlluminaGenotypingArray
          NAMESPACE: warp-pipelines
          WORKSPACE: WARP Tests


      #- name: Update and Upload method configuration
      #  id: pipeline_run
      #  run: |
      #    # Set common environment variables
      #    TOKEN="${{ steps.auth.outputs.access_token }}"
      #    NAMESPACE="warp-pipelines"
      #    WORKSPACE="WARP Tests"
      #    USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
      #    UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
      #    #TEST_TYPE="${{ github.event.inputs.testType || 'Plumbing' }}"
      #    TEST_TYPE="${{ env.testType }}"
      #    TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
      #    CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
      #
      #    echo "truth branch: $TRUTH_BRANCH"
      #
      #    ########################################
      #    # SET PIPELINE SPECIFIC VARIABLES HERE #
      #    ########################################
      #    PIPELINE_NAME="TestIlluminaGenotypingArray"
      #    PIPELINE_DIR="pipelines/broad/genotyping/illumina"
      #    # TODO: Need to set the truth and result paths appropriately
      #    # TODO: Need to dynamically set the truth branch, for now it is hardcoded to master branch
      #    # We may want to keep the truth and resuts buckets separate for TTL reasons
      #    TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
      #    RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME"
      #
      #
      #    # Function to call the Firecloud API using the firecloud_api2.py script
      #    firecloud_action() {
      #      python3 scripts/firecloud_api/firecloud_api2.py --action "$1" "${@:2}"
      #    }
      #
#
      #    # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
      #      if [ "$USE_CALL_CACHE" == "true" ]; then
      #          USE_CALL_CACHE_BOOL=true
      #      else
      #          USE_CALL_CACHE_BOOL=false
      #      fi
      #
      #
      #    # Convert UPDATE_TRUTH to a boolean-friendly format ("true" -> true, "false" -> false)
      #    if [ "$UPDATE_TRUTH" = "true" ]; then
      #        UPDATE_TRUTH_BOOL=true
      #    else
      #        UPDATE_TRUTH_BOOL=false
      #    fi
      #
      #    # Create the submission_data.json file which will be the same for all inputs
      #    SUBMISSION_DATA_FILE="submission_data.json"
      #
      #    # Use a heredoc to generate the JSON file content dynamically
      #    cat <<EOF > "$SUBMISSION_DATA_FILE"
      #    {
      #      "methodConfigurationNamespace": "warp-pipelines",
      #      "methodConfigurationName": "$PIPELINE_NAME",
      #      "useCallCache": $USE_CALL_CACHE_BOOL,
      #      "deleteIntermediateOutputFiles": false,
      #      "useReferenceDisks": true,
      #      "memoryRetryMultiplier": 1.2,
      #      "workflowFailureMode": "NoNewCalls",
      #      "userComment": "Automated submission",
      #      "ignoreEmptyOutputs": false
      #    }
      #    EOF
      #    echo "Created submission data file: $SUBMISSION_DATA_FILE"
#
      #    # Initialize variables to aggregate statuses and outputs
      #    ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
      #    ALL_OUTPUTS=""
      #
      #    # Initialize arrays to track submission and workflow statuses
      #    declare -a SUBMISSION_IDS
      #    declare -A WORKFLOW_STATUSES
#
      #    # Loop through each file in the appropriate test inputs directory
      #    INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
      #
      #    echo "Running tests with test type: $TEST_TYPE"
      #
      #    MAX_RETRIES=2
      #    RETRY_DELAY=300  # 300 seconds = 5 minutes
 #
      #    for input_file in "$INPUTS_DIR"/*.json; do
      #      echo "Processing input file: $input_file"
      #      test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
      #                                                                          --results_path "$RESULTS_PATH" \
      #                                                                          --inputs_json "$input_file" \
      #                                                                          --update_truth "$UPDATE_TRUTH_BOOL" \
      #                                                                          --commit_hash "$COMMIT_HASH" )
      #      echo "Uploading the test input file: $test_input_file"
      #      echo "Branch name: $branch_name"
      #
      #      python3 scripts/firecloud_api/firecloud_api2.py upload_test_inputs \
      #        --workspace-namespace warp-pipelines \
      #        --workspace-name "WARP Tests" \
      #        --pipeline_name "$PIPELINE_NAME" \
      #        --test_input_file "$test_input_file" \
      #        --branch_name "$branch_name" \
      #        --sa-json-b64 "$SA_JSON_B64" \
      #        --user "pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com"
      #    done

      #      attempt=1
      #      while [ $attempt -le $MAX_RETRIES ]; do
      #        echo "Attempt $attempt: Submitting job for input file: $input_file"
      #        #echo "Submitting job for input file: $input_file"
      #        cat "$SUBMISSION_DATA_FILE"
      #        SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE")
      #
      #        if [[ "$SUBMISSION_ID" == *"404"* ]]; then
      #          echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..."
      #          sleep $RETRY_DELAY
      #          ((attempt++))
      #        elif [ -z "$SUBMISSION_ID" ]; then
      #          echo "Submission failed for input file: $input_file. No submission ID received."
      #          break
      #        else
      #          echo "Submission successful. Submission ID: $SUBMISSION_ID"
      #          SUBMISSION_IDS+=("$SUBMISSION_ID")
      #          break
      #        fi
      #
      #        if [ $attempt -gt $MAX_RETRIES ]; then
      #          echo "Max retries reached. Exiting..."
      #        fi
      #      done
      #    done
#
      #      #echo "Submission ID: $SUBMISSION_ID"
      #      #SUBMISSION_IDS+=("$SUBMISSION_ID")
      #
      #
      #    echo "Monitoring the status of submitted workflows..."
      #    for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
      #      echo "Polling submission status for Submission ID: $SUBMISSION_ID"
      #      RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID")
#
      #      if [ -z "$RESPONSE" ]; then
      #        echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
      #        continue
      #      fi
      #
      #      # Parse and store workflow statuses
      #      WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
      #      echo "Statuses for submission $SUBMISSION_ID:"
      #      echo "$WORKFLOW_STATUSES_FOR_SUBMISSION"
      #
      #      # Append to aggregate statuses
      #      WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION
      #
      #      # retrieve workflow outputs
      #      echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
      #      for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
      #        WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME")
      #        ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
      #        done
      #      done
      #
      #      # Generate final summary tables with hyperlinks for Submission IDs
      #      echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
      #      for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
      #        # Generate the Terra URL for the submission
      #        SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID"
      #
      #        # Add the Submission ID as a hyperlink
      #        echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
      #
      #        # Add the workflows and statuses for this submission
      #        echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
      #
      #        # Add a blank line for separation
      #        echo "" >> $GITHUB_STEP_SUMMARY
      #      done

      - name: Download Commit Hash from GCP
        run: |
          gsutil cp gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/commit_hash.txt ./commit_hash.txt

      - name: Check Commit Hash
        id: check_commit_hash
        run: |
          # Read the commit hash from the downloaded file
          COMMIT_HASH_FROM_WDL=$(cat commit_hash.txt)
      
          # Compare the two commit hashes
          if [ "$COMMIT_HASH_FROM_WDL" != "${{ env.COMMIT_HASH }}" ]; then
            echo "Error: The commit hash from the WDL output does not match the expected commit hash."
            exit 1
          else
            echo "Commit hash match successful: $COMMIT_HASH_FROM_WDL"
          fi

      - name: Print Summary on Success
        if: success()
        run: |
          echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY

      - name: Print Summary on Failure
        if: failure()
        run: |
          echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY