Run Flink Test #1073
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run Flink Test | |
on: | |
push: | |
branches: [ "main" ] | |
pull_request: | |
branches: [ "main" ] | |
types: [ opened, reopened, synchronize, labeled ] | |
schedule: | |
- cron: '0 4 * * *' # run once a day at 4 AM | |
jobs: | |
build: | |
# run on: | |
# - all pushes to main | |
# - schedule defined above | |
# - a PR was just labeled 'test-flink' or 'test-all' | |
# - a PR with 'test-flink' or 'test-all' label was opened, reopened, or synchronized | |
if: | | |
github.event_name == 'push' || | |
github.event_name == 'schedule' || | |
github.event.label.name == 'test-all' || | |
github.event.label.name == 'test-flink' || | |
contains( github.event.pull_request.labels.*.name, 'test-all') || | |
contains( github.event.pull_request.labels.*.name, 'test-flink') | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: [ "3.9", "3.10", "3.11" ] | |
recipes-version: [ | |
"pangeo-forge-recipes==0.10.4", | |
] | |
beam-version: [ | |
"apache-beam==2.47.0", | |
# save some cycles and infer it might | |
# work on all versions between lowest and highest | |
# "apache-beam==2.48.0", | |
# "apache-beam==2.49.0", | |
# "apache-beam==2.50.0", | |
"apache-beam==2.52.0", | |
] | |
# keep here to be explicit for future users what version | |
# of Flink we are supporting. | |
# | |
# Even though the operator says it's running Flink 1.17 | |
# there is no portable runner .jar available yet :scream: :thanksflink: | |
# https://repo.maven.apache.org/maven2/org/apache/beam/beam-runners-flink* | |
# but things seem stable for 1.16 :fingerscrossed: | |
flink-version: [ "1.16", ] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
# Starts a k8s cluster with NetworkPolicy enforcement and installs both | |
# kubectl and helm | |
# | |
# ref: https://github.com/jupyterhub/action-k3s-helm/ | |
- uses: jupyterhub/action-k3s-helm@v3 | |
with: | |
metrics-enabled: false | |
traefik-enabled: false | |
docker-enabled: true | |
- name: Setup CertManager | |
run: | | |
# Setup cert-manager, required by Flink Operator | |
CERT_MANAGER_VERSION=1.9.1 | |
kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v${CERT_MANAGER_VERSION}/cert-manager.yaml | |
- name: Wait for CertManager to be ready | |
uses: jupyterhub/action-k8s-await-workloads@v1 | |
with: | |
timeout: 150 | |
max-restarts: 1 | |
namespace: cert-manager | |
- name: Setup FlinkOperator | |
run: | | |
# as noted here: https://github.com/pangeo-forge/pangeo-forge-cloud-federation/pull/6#issuecomment-1821285529 | |
# and more specifically here: https://cwiki.apache.org/confluence/display/FLINK/Release+Schedule+and+Planning | |
# "Support for two (current and previous) releases with bug fixes" | |
FLINK_OPERATOR_VERSION=1.6.1 | |
helm repo add flink-operator-repo https://downloads.apache.org/flink/flink-kubernetes-operator-${FLINK_OPERATOR_VERSION} | |
helm install flink-kubernetes-operator flink-operator-repo/flink-kubernetes-operator --wait | |
kubectl get pod -A | |
kubectl get crd -A | |
- name: 'Set up Cloud SDK' | |
uses: 'google-github-actions/setup-gcloud@v0' | |
- name: 'Setup minio cli mc' | |
run: | | |
wget --quiet https://dl.min.io/client/mc/release/linux-amd64/mc | |
chmod +x mc | |
mv mc /usr/local/bin/mc | |
mc --version | |
- name: Setup "${{ matrix.beam-version }}" to satisfy pf-runner installs | |
run: | | |
pip install ${{ matrix.beam-version }} | |
- name: Install dependencies & our package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e .[test] | |
- name: Set up min.io as a k3s service | |
run: | | |
MYACCESSKEY=$(openssl rand -hex 16) | |
MYSECRETKEY=$(openssl rand -hex 16) | |
kubectl create secret generic minio-secrets --from-literal=MINIO_ACCESS_KEY=$MYACCESSKEY --from-literal=MINIO_SECRET_KEY=$MYSECRETKEY | |
kubectl apply -f tests/integration/flink/minio-manifest.yaml | |
- name: Install socat so kubectl port-forward will work | |
run: | | |
# Not sure if this is why kubectl proxy isn't working, but let's try | |
sudo apt update --yes && sudo apt install --yes socat | |
- name: Test with pytest | |
id: testrunner | |
continue-on-error: true | |
run: | | |
pytest -vvv -s --cov=pangeo_forge_runner tests/integration/flink/test_flink_integration.py \ | |
--flink-version=${{ matrix.flink-version }} \ | |
--python-version=${{ matrix.python-version }} \ | |
--beam-version=${{ matrix.beam-version }} \ | |
--recipes-version=${{ matrix.recipes-version }} | |
- name: Dump logs from pods after failure so we don't have ssh | |
if: steps.testrunner.outcome == 'failure' | |
run: | | |
echo "The previous step failed or timed out. Dumping logs..." | |
# much easier to do in bash than in Python via subprocess | |
echo "##################### OPERATOR ######################" | |
kubectl get pod | grep operator | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000 | |
echo "##################### JOB MANAGER ######################" | |
kubectl get pod | grep -v manager | grep recipe- | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000 | |
echo "##################### TASK MANAGER ######################" | |
kubectl get pod | grep manager | head -n1 | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} | tail -n 1000 | |
# force GH action to show failed result | |
exit 128 | |
- name: Upload Coverage to Codecov | |
uses: codecov/codecov-action@v2 |