-
Notifications
You must be signed in to change notification settings - Fork 9
157 lines (138 loc) · 6.02 KB
/
flink.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
name: Run Flink Test
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
types: [ opened, reopened, synchronize, labeled ]
schedule:
- cron: '0 4 * * *' # run once a day at 4 AM
jobs:
build:
# run on:
# - all pushes to main
# - schedule defined above
# - a PR was just labeled 'test-flink' or 'test-all'
# - a PR with 'test-flink' or 'test-all' label was opened, reopened, or synchronized
if: |
github.event_name == 'push' ||
github.event_name == 'schedule' ||
github.event.label.name == 'test-all' ||
github.event.label.name == 'test-flink' ||
contains( github.event.pull_request.labels.*.name, 'test-all') ||
contains( github.event.pull_request.labels.*.name, 'test-flink')
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [ "3.9", "3.10", "3.11" ]
recipes-version: [
"pangeo-forge-recipes==0.10.0",
# save some cycles and infer it might
# work on all versions between lowest and highest
# "pangeo-forge-recipes==0.10.3",
"pangeo-forge-recipes==0.10.4",
]
beam-version: [
"apache-beam==2.47.0",
# save some cycles and infer it might
# work on all versions between lowest and highest
# "apache-beam==2.48.0",
# "apache-beam==2.49.0",
# "apache-beam==2.50.0",
"apache-beam==2.52.0",
]
# keep here to be explicit for future users what version
# of Flink we are supporting.
#
# Even though the operator says it's running Flink 1.17
# there is no portable runner .jar available yet :scream: :thanksflink:
# https://repo.maven.apache.org/maven2/org/apache/beam/beam-runners-flink*
# but things seem stable for 1.16 :fingerscrossed:
flink-version: [ "1.16", ]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
# Starts a k8s cluster with NetworkPolicy enforcement and installs both
# kubectl and helm
#
# ref: https://github.com/jupyterhub/action-k3s-helm/
- uses: jupyterhub/action-k3s-helm@v3
with:
metrics-enabled: false
traefik-enabled: false
docker-enabled: true
- name: Setup CertManager
run: |
# Setup cert-manager, required by Flink Operator
CERT_MANAGER_VERSION=1.9.1
kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v${CERT_MANAGER_VERSION}/cert-manager.yaml
- name: Wait for CertManager to be ready
uses: jupyterhub/action-k8s-await-workloads@v1
with:
timeout: 150
max-restarts: 1
namespace: cert-manager
- name: Setup FlinkOperator
run: |
# as noted here: https://github.com/pangeo-forge/pangeo-forge-cloud-federation/pull/6#issuecomment-1821285529
# and more specifically here: https://cwiki.apache.org/confluence/display/FLINK/Release+Schedule+and+Planning
# "Support for two (current and previous) releases with bug fixes"
FLINK_OPERATOR_VERSION=1.6.1
helm repo add flink-operator-repo https://downloads.apache.org/flink/flink-kubernetes-operator-${FLINK_OPERATOR_VERSION}
helm install flink-kubernetes-operator flink-operator-repo/flink-kubernetes-operator --wait
kubectl get pod -A
kubectl get crd -A
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v0'
- name: 'Setup minio cli mc'
run: |
wget --quiet https://dl.min.io/client/mc/release/linux-amd64/mc
chmod +x mc
mv mc /usr/local/bin/mc
mc --version
- name: Setup "${{ matrix.beam-version }}" to satisfy pf-runner installs
run: |
pip install ${{ matrix.beam-version }}
- name: Install dependencies & our package
run: |
python -m pip install --upgrade pip
python -m pip install -e .[test,flink]
python -m pip install -U ${{ matrix.recipes-version }}
- name: Set up min.io as a k3s service
run: |
MYACCESSKEY=$(openssl rand -hex 16)
MYSECRETKEY=$(openssl rand -hex 16)
kubectl create secret generic minio-secrets --from-literal=MINIO_ACCESS_KEY=$MYACCESSKEY --from-literal=MINIO_SECRET_KEY=$MYSECRETKEY
kubectl apply -f tests/integration/flink/minio-manifest.yaml
- name: Install socat so kubectl port-forward will work
run: |
# Not sure if this is why kubectl proxy isn't working, but let's try
sudo apt update --yes && sudo apt install --yes socat
- name: Test with pytest
id: testrunner
continue-on-error: true
run: |
beamversion=$(echo ${{ matrix.beam-version }} | cut -d"=" -f3)
pytest -vvv -s --cov=pangeo_forge_runner tests/integration/flink/test_flink_integration.py \
--flinkversion=${{ matrix.flink-version }} \
--pythonversion=${{ matrix.python-version }} \
--beamversion=$beamversion
- name: Dump logs from pods after failure so we don't have ssh
if: steps.testrunner.outcome == 'failure'
run: |
echo "The previous step failed or timed out. Dumping logs..."
# much easier to do in bash than in Python via subprocess
echo "##################### OPERATOR ######################"
kubectl get pod | grep operator | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000
echo "##################### JOB MANAGER ######################"
kubectl get pod | grep -v manager | grep recipe- | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000
echo "##################### TASK MANAGER ######################"
kubectl get pod | grep manager | head -n1 | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000
# force GH action to show failed result
exit 128
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v2