-
Notifications
You must be signed in to change notification settings - Fork 4
141 lines (128 loc) · 5.04 KB
/
test_iasworld_data.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
name: test-iasworld-data
# This workflow is not configured to run on PRs, because PRs have their own CI
# process defined by the `build-and-test-dbt` workflow. Instead, the Data
# Department's Spark data extraction process dispatches it via API once per
# day after finishing the daily ingest of iasWorld data. For more detail, see:
#
# https://github.com/ccao-data/service-spark-iasworld
on:
workflow_dispatch:
inputs:
select:
description: >
Optional space-separated list of tests to run (defaults to all
iasWorld data tests)
required: false
type: string
selector:
description: >
Optional dbt selector representing tests to run (takes precedence
over the above list of tests if both are present)
required: false
type: string
upload_test_results:
description: Upload test results to S3
required: false
default: false
type: boolean
env:
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1
jobs:
test-iasworld-data:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Validate and parse input variables
id: parse-inputs
run: |
# Default to no select option, which will fall back to the default
# behavior of the underlying Python script
SELECT_OPTION=""
if [[ -n "$SELECTOR" ]]; then
SELECT_OPTION="--selector $SELECTOR"
elif [[ -n "$SELECT" ]]; then
SELECT_OPTION="--select $SELECT"
fi
echo "Setting select option to '$SELECT_OPTION'"
echo "select-option=$SELECT_OPTION" >> "$GITHUB_OUTPUT"
shell: bash
env:
SELECT: ${{ inputs.select }}
SELECTOR: ${{ inputs.selector }}
- name: Setup dbt
uses: ./.github/actions/setup_dbt
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
- name: Restore dbt state cache
id: cache
uses: ./.github/actions/restore_dbt_cache
with:
path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }}
key: ${{ env.CACHE_KEY }}
- name: Install Python dependencies
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
run: uv pip install ".[dbt_tests]"
- name: Run tests
run: |
DEFER_OPTION=""
if [[ "$CACHE_HIT" == 'true' ]]; then
DEFER_OPTION="--defer --state $STATE_DIR"
fi
# shellcheck disable=SC2086
python scripts/run_iasworld_data_tests.py \
--target "$TARGET" \
--output-dir ./qc_test_results/ \
$SELECT_OPTION \
$SKIP_ARTIFACTS_OPTION \
$DEFER_OPTION
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
env:
USER: ${{ github.triggering_actor }}
GIT_SHA: ${{ github.sha }}
GIT_REF: ${{ github.ref_name }}
GIT_AUTHOR: ${{ github.event.commits[0].author.name }}
CACHE_HIT: ${{ steps.cache.outputs.cache-hit }}
SELECT_OPTION: ${{ steps.parse-inputs.outputs.select-option }}
SKIP_ARTIFACTS_OPTION: ${{ inputs.upload_test_results && '--no-skip-artifacts' || '--skip-artifacts' }}
- name: Save test results to S3
if: inputs.upload_test_results
run: |
s3_prefix="s3://ccao-data-warehouse-us-east-1/qc"
local_prefix="qc_test_results/metadata"
for dir in "test_run" "test_run_result" "test_run_failing_row"; do
dirpath="${local_prefix}/${dir}"
if [ -e "$dirpath" ]; then
echo "Copying ${dirpath} metadata to S3"
aws s3 sync "$dirpath" "${s3_prefix}/${dir}"
fi
done
crawler_name="ccao-data-warehouse-qc-crawler"
aws glue start-crawler --name "$crawler_name"
echo "Triggered Glue crawler $crawler_name"
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
- name: Get current time
if: failure()
run: echo "TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S")" >> "$GITHUB_ENV"
shell: bash
# Only triggered when run on a schedule. Otherwise, whoever dispatched the
# workflow is notified via GitHub (instead of SNS)
- name: Send failure notification
if: github.event_name == 'workflow_dispatch' && github.triggering_actor == 'sqoop-bot[bot]' && failure()
uses: ./.github/actions/publish_sns_topic
with:
sns_topic_arn: ${{ secrets.AWS_SNS_NOTIFICATION_TOPIC_ARN }}
subject: "iasWorld tests errored for workflow run: ${{ github.run_id }}"
body: |
iasWorld tests raised an error for workflow ${{ github.run_id }}, run on ${{ env.TIMESTAMP }} UTC
Link to failing workflow:
https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}