Create EKS cluster, deploy CKF and run bundle test #123
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Create EKS cluster, deploy CKF and run bundle test | |
on: | |
workflow_dispatch: # This event allows manual triggering from the Github UI | |
inputs: | |
bundle_version: | |
description: 'Comma-separated list of bundle versions e.g. 1.8, 1.9, latest. Make sure that the corresponding K8s version is supported by the cloud.' | |
default: '1.8, 1.9, latest' | |
required: true | |
k8s_version: | |
description: 'Kubernetes version to be used for the AKS cluster' | |
required: false | |
uats_branch: | |
description: 'Branch to run the UATs from e.g. main or track/1.9. By default, this is defined by the dependencies.yaml file.' | |
required: false | |
schedule: | |
- cron: "23 0 * * 2" | |
jobs: | |
preprocess-input: | |
runs-on: ubuntu-24.04 | |
outputs: | |
processed_bundle_versions: ${{ steps.process_bundle_versions.outputs.bundle_versions }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Install CLI tools | |
run: | | |
sudo snap install yq | |
- name: Process bundle versions | |
id: process_bundle_versions | |
run: | | |
if [[ "${{ github.event_name }}" == "schedule" ]]; then | |
# Use `tr` to remove new lines as a workaround to: | |
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#multiline-strngs | |
bundle_versions=$(yq '. | keys' .github/dependencies.yaml -o=json | tr -d '\n') | |
echo "bundle_versions=${bundle_versions}" | |
echo "bundle_versions=${bundle_versions}" >> $GITHUB_OUTPUT | |
else | |
python scripts/gh-actions/parse_versions.py "${{ inputs.bundle_version }}" | |
fi | |
deploy-ckf-to-eks: | |
needs: preprocess-input | |
runs-on: ubuntu-24.04 | |
strategy: | |
matrix: | |
bundle_version: ${{ fromJSON(needs.preprocess-input.outputs.processed_bundle_versions) }} | |
fail-fast: false | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Run YAML to Github Output Action | |
id: yaml-output | |
uses: christian-ci/action-yaml-github-output@v2 | |
with: | |
file_path: ".github/dependencies.yaml" | |
main_key: ${{ matrix.bundle_version }} | |
- name: Update ENV variables from inputs if available | |
run: | | |
K8S_VERSION=${{ inputs.k8s_version || env.K8S_VERSION }} | |
echo "K8S_VERSION=${K8S_VERSION}" >> $GITHUB_ENV | |
UATS_BRANCH=${{ inputs.uats_branch || env.UATS_BRANCH }} | |
echo "UATS_BRANCH=${UATS_BRANCH}" >> $GITHUB_ENV | |
- name: Install CLI tools | |
run: | | |
pip install tox | |
sudo snap install juju --channel=${{ env.JUJU_VERSION }}/stable | |
sudo snap install charmcraft --classic | |
juju version | |
- name: Configure AWS Credentials | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_SECRET_ACCESS_KEY }} | |
run: | | |
aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID | |
aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY | |
aws configure set default.region eu-central-1 | |
- name: Install kubectl | |
run: | | |
sudo snap install kubectl --classic --channel=${{ env.K8S_VERSION }}/stable | |
mkdir ~/.kube | |
kubectl version --client | |
- name: Install eksctl | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y unzip | |
curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp | |
sudo mv /tmp/eksctl /usr/local/bin | |
eksctl version | |
- name: Create cluster | |
run: | | |
VERSION=${{ matrix.bundle_version }} | |
VERSION_WITHOUT_DOT="${VERSION//.}" | |
yq e ".metadata.name |= \"kubeflow-test-$VERSION_WITHOUT_DOT\"" -i .github/cluster.yaml | |
yq e ".metadata.version |= \"${{ env.K8S_VERSION }}\"" -i .github/cluster.yaml | |
eksctl create cluster -f .github/cluster.yaml | |
kubectl get nodes | |
- name: Setup juju | |
run: | | |
# Call juju bin directly as a workaround to https://bugs.launchpad.net/juju/+bug/2007575 | |
/snap/juju/current/bin/juju add-k8s eks --client | |
juju bootstrap eks kubeflow-controller | |
juju add-model kubeflow | |
- name: Test bundle deployment | |
run: | | |
tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s | |
- name: Run Kubeflow UATs | |
run: | | |
git clone https://github.com/canonical/charmed-kubeflow-uats.git ~/charmed-kubeflow-uats | |
cd ~/charmed-kubeflow-uats | |
git checkout ${{ env.UATS_BRANCH }} | |
tox -e kubeflow-remote | |
# On failure, capture debugging resources | |
- name: Save debug artifacts | |
uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main | |
if: failure() || cancelled() | |
# On failure, capture debugging resources | |
- name: Get juju status | |
run: juju status | |
if: failure() || cancelled() | |
- name: Get juju debug logs | |
run: juju debug-log --replay --no-tail | |
if: failure() || cancelled() | |
- name: Get all kubernetes resources | |
run: kubectl get all -A | |
if: failure() || cancelled() | |
- name: Get logs from pods with status = Pending | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Get logs from pods with status = Failed | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Get logs from pods with status = CrashLoopBackOff | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Delete EKS cluster | |
if: always() | |
run: | | |
VERSION=${{ matrix.bundle_version }} | |
VERSION_WITHOUT_DOT="${VERSION//.}" | |
eksctl delete cluster --region eu-central-1 --name=kubeflow-test-$VERSION_WITHOUT_DOT | |
# Clean up leftover CloudFormation stack when cluster creation fails unexpectedly | |
- name: Delete CloudFormation stack (if present) | |
if: always() | |
run: | | |
VERSION=${{ matrix.bundle_version }} | |
VERSION_WITHOUT_DOT="${VERSION//.}" | |
aws cloudformation delete-stack --region eu-central-1 --stack-name eksctl-kubeflow-test-$VERSION_WITHOUT_DOT-cluster | |
delete-unattached-volumes: | |
if: always() | |
uses: ./.github/workflows/delete-aws-volumes.yaml | |
secrets: inherit | |
with: | |
region: eu-central-1 | |
needs: [deploy-ckf-to-eks] |