From 00d0827ab4cf001b09e769d38e39ad7706f10092 Mon Sep 17 00:00:00 2001 From: Richard Hagen Date: Tue, 17 Dec 2024 08:18:15 +0100 Subject: [PATCH] Release Pod Failure Policies, Healthchecks, Managed Redis Ports Policy, updated build process (#1254) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Add Healtchchecks to RadixConfig * sync RadixDeployment to KubeDeployment with Healtchchecks * sync RadixApplication to RadixDeployment with Healtchchecks * fix tests * fix linting * Add RA validation * Start testing * replace k8s types with radix, add pointers where optional * bump chart * fix correct error component name * fix correct error component name * Allow redis ports in policy (#1237) * Allow redis ports in policy * Bump chart version * Add support for defining pod failure policies for jobs in radixconfig (#1236) * simplify structures * init commit * bump charts * Build on docker, push same image to all container registries * test workflow on pull (acr tasks should fail) * fix buildx * revert test * push latest tag to ACR * dont export pipeline long tag * configur container image name * simpler validateProbe, remove unneeded tests * fix typo * cleanup * wait 2 minutes * configure buildx platforms * Revert to Registry Cache, push latest operator image * remove unused variable * specify buildcache tag * fix quote sign * fix quote sign * Create a index manifest for the target repo (#1245) * Create a index manifest for the target repo * show progress * Fix typo in buildscript (#1247) * Create a index manifest for the target repo * fix typo * echo out dry-run and push versioned pipeline runner (#1248) * Build and push image to all registries, use cache (#1249) * echo out dry-run and push versioned pipeline runner * Build and push image to all registries, use cache * fix merge conflict (#1250) * Log in to GHCR to enable cache (#1251) * Bump golang.org/x/crypto from 0.26.0 to 0.31.0 (#1246) * fix build process (#1253) --------- Co-authored-by: Nils Gustav Stråbø <65334626+nilsgstrabo@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build-push.yml | 208 ++- .vscode/launch.json | 3 +- charts/radix-operator/Chart.yaml | 4 +- .../templates/radixapplication.yaml | 1038 ++++++++++++ .../radix-operator/templates/radixbatch.yaml | 70 +- .../templates/radixdeployment.yaml | 503 ++++++ code | 186 +++ go.mod | 10 +- go.sum | 20 +- json-schema/radixapplication.json | 1166 ++++++++++++- pkg/apis/batch/kubejob.go | 8 +- pkg/apis/batch/status.go | 85 +- pkg/apis/batch/syncer.go | 21 +- pkg/apis/batch/syncer_test.go | 1469 ++++++++--------- pkg/apis/batch/utils.go | 10 + pkg/apis/deployment/jobschedulercomponent.go | 4 + pkg/apis/deployment/kubedeployment.go | 16 +- pkg/apis/deployment/kubedeployment_test.go | 56 +- pkg/apis/deployment/networkpolicy.go | 54 +- pkg/apis/deployment/radixcomponent.go | 31 +- pkg/apis/deployment/radixcomponent_test.go | 81 + pkg/apis/deployment/radixjobcomponent.go | 30 +- pkg/apis/deployment/radixjobcomponent_test.go | 110 ++ pkg/apis/radix/v1/radixapptypes.go | 96 ++ pkg/apis/radix/v1/radixbatchtypes.go | 6 +- pkg/apis/radix/v1/radixdeploytypes.go | 23 + pkg/apis/radix/v1/radixhealthchecktypes.go | 209 +++ pkg/apis/radix/v1/radixjobtypes.go | 32 +- pkg/apis/radix/v1/zz_generated.deepcopy.go | 259 +++ pkg/apis/radixvalidators/errors.go | 4 + .../radixvalidators/testdata/radixconfig.yaml | 50 +- pkg/apis/radixvalidators/validate_ra.go | 110 ++ pkg/apis/radixvalidators/validate_ra_test.go | 53 + .../utils/applicationcomponent_builder.go | 12 + .../utils/applicationjobcomponent_builder.go | 8 + .../utils/componentenvironment_builder.go | 12 + pkg/apis/utils/deploymentcomponent_builder.go | 12 + .../utils/deploymentjobcomponent_builder.go | 8 + pkg/apis/utils/failurepolicy.go | 35 + pkg/apis/utils/failurepolicy_test.go | 71 + .../utils/jobcomponentenvironment_builder.go | 8 + .../batch/internal/syncerfactory.go | 8 +- .../batch/internal/syncerfactory_mock.go | 13 +- 43 files changed, 5086 insertions(+), 1126 deletions(-) create mode 100644 code create mode 100644 pkg/apis/radix/v1/radixhealthchecktypes.go create mode 100644 pkg/apis/utils/failurepolicy.go create mode 100644 pkg/apis/utils/failurepolicy_test.go diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 435d75ba2..8cfe39865 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -8,56 +8,145 @@ on: permissions: id-token: write contents: read + packages: write jobs: - build-deploy: + build-operator: runs-on: ubuntu-20.04 + name: Build Operator + outputs: + tag: ${{ steps.metadata.outputs.tag }} + fullname_latest: ${{ steps.metadata.outputs.fullname_latest }} + steps: + - uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build image tags + id: metadata + run: | + sha=${GITHUB_SHA::8} + ts=$(date +%s) + tag=${GITHUB_REF_NAME}-${sha}-${ts} + tag_latest=${GITHUB_REF_NAME}-latest + image="radix-operator" + echo "tag=$tag" >> $GITHUB_OUTPUT + echo "fullname=ghcr.io/equinor/$image:$tag" >> $GITHUB_OUTPUT + echo "fullname_latest=ghcr.io/equinor/$image:$tag_latest" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push radix-operator docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + file: ./operator.Dockerfile + platforms: | + linux/amd64 + linux/arm64 + tags: | + ${{ steps.metadata.outputs.fullname }} + ${{ steps.metadata.outputs.fullname_latest }} + cache-from: "type=registry,ref=${{ steps.metadata.outputs.fullname_latest }}-buildcache" + cache-to: "type=registry,ref=${{ steps.metadata.outputs.fullname_latest }}-buildcache,mode=max" + + build-pipelinerunner: + runs-on: ubuntu-20.04 + name: Build Pipeline runner + outputs: + tag_latest: ${{ steps.metadata.outputs.tag_latest }} + fullname_latest: ${{ steps.metadata.outputs.fullname_latest }} + steps: + - uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build image names + id: metadata + run: | + sha=${GITHUB_SHA::8} + ts=$(date +%s) + tag_latest=${GITHUB_REF_NAME}-latest + tag=${GITHUB_REF_NAME}-${sha}-${ts} + image="radix-pipeline-runner" + echo "tag_latest=$tag_latest" >> $GITHUB_OUTPUT + echo "tag=$tag" >> $GITHUB_OUTPUT + echo "fullname=ghcr.io/equinor/$image:$tag" >> $GITHUB_OUTPUT + echo "fullname_latest=ghcr.io/equinor/$image:$tag_latest" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push pipeline-runner docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + file: ./pipeline.Dockerfile + platforms: | + linux/amd64 + linux/arm64 + tags: | + ${{ steps.metadata.outputs.fullname }} + ${{ steps.metadata.outputs.fullname_latest }} + cache-from: "type=registry,ref=${{ steps.metadata.outputs.fullname_latest }}-buildcache" + cache-to: "type=registry,ref=${{ steps.metadata.outputs.fullname_latest }}-buildcache,mode=max" + + deploy: + runs-on: ubuntu-20.04 + needs: + - build-pipelinerunner + - build-operator strategy: fail-fast: false matrix: target: - name: "dev" - ref: "refs/heads/master" acr-name: "radixdev" client-id: "2bfe6984-f5e3-4d09-a0b2-4dd96de3f21e" subscription-id: "16ede44b-1f74-40a5-b428-46cca9a5741b" - name: "playground" - ref: "refs/heads/release" acr-name: "radixplayground" client-id: "7c000a42-1edb-4491-a241-4ac77bf7dd6d" subscription-id: "16ede44b-1f74-40a5-b428-46cca9a5741b" - name: "platform" - ref: "refs/heads/release" acr-name: "radixprod" client-id: "044f760d-aabb-4d29-a879-e774f16e3bcc" subscription-id: "ded7ca41-37c8-4085-862f-b11d21ab341a" - name: "c2" - ref: "refs/heads/release" acr-name: "radixc2prod" client-id: "581bb747-7b9f-4e80-a843-249eafb0a5fa" subscription-id: "ded7ca41-37c8-4085-862f-b11d21ab341a" steps: - uses: actions/checkout@v4 - if: matrix.target.ref == github.ref - uses: azure/login@v2 - if: matrix.target.ref == github.ref with: client-id: ${{matrix.target.client-id}} tenant-id: "3aa4a235-b6e2-48d5-9195-7fcf05b459b0" subscription-id: ${{matrix.target.subscription-id}} - name: Get GitHub Public IP - if: matrix.target.ref == github.ref id: github_public_ip run: echo "ipv4=$(curl 'https://ifconfig.me/ip')" >> $GITHUB_OUTPUT - name: Add GitHub IP to ACR - if: matrix.target.ref == github.ref id: update_firewall run: az acr network-rule add --name ${{matrix.target.acr-name}} @@ -65,86 +154,28 @@ jobs: --ip-address ${{ steps.github_public_ip.outputs.ipv4 }} - name: Wait for 2 minutes while the network rule to take effect - if: matrix.target.ref == github.ref - run: | - sleep 120 + run: sleep 120 - - name: Wait for Specific IP in ACR Network Rules - if: matrix.target.ref == github.ref - run: | - MAX_ATTEMPTS=10 - ATTEMPT=0 - TARGET_IP="${{ steps.github_public_ip.outputs.ipv4 }}" - echo "Waiting for IP $TARGET_IP to be allowed in ACR network rules..." - while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do - NETWORK_RULES=$(az acr network-rule list --name ${{matrix.target.acr-name}} --subscription ${{ matrix.target.subscription-id }} --query "ipRules[]|[?contains(ipAddressOrRange, '$TARGET_IP')]" --output tsv) - if [ -n "$NETWORK_RULES" ]; then - echo "IP $TARGET_IP is allowed." - break - fi - echo "Attempt $((ATTEMPT+1)) of $MAX_ATTEMPTS. Retrying in 10 seconds..." - ATTEMPT=$((ATTEMPT+1)) - sleep 10 - done - if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then - echo "IP $TARGET_IP was not allowed after $MAX_ATTEMPTS attempts. Exiting." - exit 1 - fi - - - name: Get ACR Login Server - if: matrix.target.ref == github.ref - id: get-acr-login-server + - name: Build image tags + id: metadata run: | - echo "login_server=$(az acr show --name ${{ matrix.target.acr-name }} --query loginServer --output tsv)" >> $GITHUB_OUTPUT + echo "operator=${{ matrix.target.acr-name }}.azurecr.io/radix-operator:${{ needs.build-operator.outputs.tag }}" >> $GITHUB_OUTPUT + echo "pipeline_latest=${{ matrix.target.acr-name }}.azurecr.io/radix-pipeline:${{ needs.build-pipelinerunner.outputs.tag_latest }}" >> $GITHUB_OUTPUT - - name: Get ACR Access Token - if: matrix.target.ref == github.ref - id: get-acr-token - run: | - echo "Getting ACR access token" - access_token=$(az acr login --name ${{ matrix.target.acr-name }} --expose-token --output tsv --query accessToken) - echo "::add-mask::$access_token" - echo "access_token=$access_token" >> $GITHUB_OUTPUT + - name: ACR Login + run: az acr login --name ${{ matrix.target.acr-name }} - - name: Log in to ACR - if: matrix.target.ref == github.ref + - name: GHCR Login uses: docker/login-action@v3 with: - registry: ${{ steps.get-acr-login-server.outputs.login_server }} - username: "00000000-0000-0000-0000-000000000000" - password: ${{ steps.get-acr-token.outputs.access_token }} + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - name: Set up Docker Buildx - if: matrix.target.ref == github.ref uses: docker/setup-buildx-action@v3 - - name: Build image names - if: matrix.target.ref == github.ref - id: build-image-names - run: | - echo "radix-operator-image-name=${{ matrix.target.acr-name }}.azurecr.io/radix-operator" >> $GITHUB_OUTPUT - echo "pipeline-runner-image-name=${{ matrix.target.acr-name }}.azurecr.io/radix-pipeline" >> $GITHUB_OUTPUT - - - name: Build image tags - if: matrix.target.ref == github.ref - id: build-tags - run: | - sha=${GITHUB_SHA::8} - ts=$(date +%s) - echo "radix-operator-tag=${GITHUB_REF_NAME}-${sha}-${ts}" >> $GITHUB_OUTPUT - echo "pipeline-runner-tag=${GITHUB_REF_NAME}-latest" >> $GITHUB_OUTPUT - echo "cache-radix-operator-tag=cache-radix-operator-${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT - echo "cache-pipeline-runner-tag=cache-pipeline-runner-${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT - - - name: Extract labels from metadata for Docker - if: matrix.target.ref == github.ref - id: radix-operator-meta - uses: docker/metadata-action@v5 - with: - images: ${{ steps.build-image-names.outputs.radix-operator-image-name }} - - - name: Build and push radix-operator docker image - if: matrix.target.ref == github.ref + - name: Build and push Operator docker image uses: docker/build-push-action@v5 with: context: . @@ -153,13 +184,12 @@ jobs: platforms: | linux/amd64 linux/arm64 - tags: "${{ steps.build-image-names.outputs.radix-operator-image-name }}:${{ steps.build-tags.outputs.radix-operator-tag }}" - labels: ${{ steps.radix-operator-meta.outputs.labels }} - cache-from: "type=registry,ref=${{ steps.build-image-names.outputs.radix-operator-image-name }}:${{ steps.build-tags.outputs.cache-radix-operator-tag }}" - cache-to: "type=registry,ref=${{ steps.build-image-names.outputs.radix-operator-image-name }}:${{ steps.build-tags.outputs.cache-radix-operator-tag }},mode=max" + tags: | + ${{ steps.metadata.outputs.operator }} + cache-from: "type=registry,ref=${{ needs.build-operator.outputs.fullname_latest }}-buildcache" + cache-to: "type=registry,ref=${{ needs.build-operator.outputs.fullname_latest }}-buildcache,mode=max" - name: Build and push pipeline-runner docker image - if: matrix.target.ref == github.ref uses: docker/build-push-action@v5 with: context: . @@ -168,13 +198,13 @@ jobs: platforms: | linux/amd64 linux/arm64 - tags: "${{ steps.build-image-names.outputs.pipeline-runner-image-name }}:${{ steps.build-tags.outputs.pipeline-runner-tag }}" - labels: ${{ steps.pipeline-runner-meta.outputs.labels }} - cache-from: "type=registry,ref=${{ steps.build-image-names.outputs.pipeline-runner-image-name }}:${{ steps.build-tags.outputs.cache-pipeline-runner-tag }}" - cache-to: "type=registry,ref=${{ steps.build-image-names.outputs.pipeline-runner-image-name }}:${{ steps.build-tags.outputs.cache-pipeline-runner-tag }},mode=max" + tags: | + ${{ steps.metadata.outputs.pipeline_latest }} + cache-from: "type=registry,ref=${{ needs.build-pipelinerunner.outputs.fullname_latest }}-buildcache" + cache-to: "type=registry,ref=${{ needs.build-pipelinerunner.outputs.fullname_latest }}-buildcache,mode=max" - name: Revoke GitHub IP on ACR - if: ${{ matrix.target.ref == github.ref && steps.update_firewall.outcome == 'success' && !cancelled()}} # Always run this step even if previous step failed + if: ${{ steps.update_firewall.outcome == 'success' && !cancelled()}} # Always run this step even if previous step failed run: az acr network-rule remove --name ${{matrix.target.acr-name}} --subscription ${{matrix.target.subscription-id}} diff --git a/.vscode/launch.json b/.vscode/launch.json index db7a04a92..8cc31b92e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -177,7 +177,8 @@ "RADIXOPERATOR_CERTIFICATE_AUTOMATION_DURATION": "2160h", "RADIXOPERATOR_CERTIFICATE_AUTOMATION_RENEW_BEFORE": "720h", "RADIX_EXTERNAL_REGISTRY_DEFAULT_AUTH_SECRET": "radix-external-registry-default-auth", - "RADIXOPERATOR_ORPHANED_ENVIRONMENTS_RETENTION_PERIOD": "30d", + "RADIXOPERATOR_ORPHANED_ENVIRONMENTS_RETENTION_PERIOD": "30h", + "RADIX_PIPELINE_JOBS_HISTORY_PERIOD_LIMIT": "24h", "RADIXOPERATOR_ORPHANED_ENVIRONMENTS_CLEANUP_CRON": "0 0 * * *", "LOG_LEVEL": "info", "LOG_PRETTY": "true" diff --git a/charts/radix-operator/Chart.yaml b/charts/radix-operator/Chart.yaml index 73ff60c1c..6f53dfb2f 100644 --- a/charts/radix-operator/Chart.yaml +++ b/charts/radix-operator/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: radix-operator -version: 1.46.4 -appVersion: 1.66.4 +version: 1.48.1 +appVersion: 1.68.1 kubeVersion: ">=1.24.0" description: Radix Operator keywords: diff --git a/charts/radix-operator/templates/radixapplication.yaml b/charts/radix-operator/templates/radixapplication.yaml index 20133fde9..87b3c10ba 100644 --- a/charts/radix-operator/templates/radixapplication.yaml +++ b/charts/radix-operator/templates/radixapplication.yaml @@ -421,6 +421,464 @@ spec: minLength: 1 pattern: ^(([a-z0-9][-a-z0-9]*)?[a-z0-9])?$ type: string + healthChecks: + description: |- + HealthChecks can tell Radix if your application is ready to receive traffic. + Defaults to a TCP check against your first listed port. + If any healthchecks are defined, no defaults will be added and you should add your own readinessProbe. + properties: + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for + the probe to be considered failed after having + succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform + the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + Defaults to TCP Probe against the first listed port + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for + the probe to be considered failed after having + succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform + the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for + the probe to be considered failed after having + succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform + the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + description: port number to access on the + container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + type: object horizontalScaling: description: |- Configuration for automatic horizontal scaling of replicas. @@ -1166,6 +1624,449 @@ spec: x-kubernetes-list-map-keys: - environment x-kubernetes-list-type: map + healthChecks: + description: |- + HealthChecks can tell Radix if your application is ready to receive traffic. + Defaults to a TCP check against your first listed port. + If any healthchecks are defined, no defaults will be added and you should add your own readinessProbe. + properties: + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + Defaults to TCP Probe against the first listed port + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + type: object horizontalScaling: description: |- Configuration for automatic horizontal scaling of replicas. @@ -2334,6 +3235,75 @@ spec: minLength: 1 pattern: ^(([a-z0-9][-a-z0-9]*)?[a-z0-9])?$ type: string + failurePolicy: + description: |- + Specifies the policy of handling failed job replicas. In particular, it allows to + specify the set of actions and conditions which need to be + satisfied to take the associated action. + If empty, the default behaviour applies - the counter of failed job replicas + is incremented and it is checked against the backoffLimit. + properties: + rules: + description: |- + A list of failure policy rules. The rules are evaluated in order. + Once a rule matches a job replica failure, the remaining of the rules are ignored. + When no rule matches the failure, the default handling applies - the + counter of failures is incremented and it is checked against + the backoffLimit. + items: + description: RadixJobComponentFailurePolicyRule + describes how a job replica failure is handled + when the onExitCodes rules are met. + properties: + action: + description: Specifies the action taken on a + job replica failure when the onExitCodes requirements + are satisfied. + enum: + - FailJob + - Ignore + - Count + type: string + onExitCodes: + description: Represents the requirement on the + job replica exit codes. + properties: + operator: + description: |- + Represents the relationship between the job replica's exit code and the + specified values. Replicas completed with success (exit code 0) are + excluded from the requirement check. + enum: + - In + - NotIn + type: string + values: + description: |- + Specifies the set of values. The job replica's exit code is checked against this set of + values with respect to the operator. The list must not contain duplicates. + Value '0' cannot be used for the In operator. + items: + format: int32 + minimum: 0 + type: integer + maxItems: 255 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - operator + - values + type: object + required: + - action + - onExitCodes + type: object + maxItems: 20 + type: array + x-kubernetes-list-type: atomic + required: + - rules + type: object identity: description: |- Environment specific configuration for workload identity (federated credentials). @@ -2813,6 +3783,74 @@ spec: x-kubernetes-list-map-keys: - environment x-kubernetes-list-type: map + failurePolicy: + description: |- + Specifies the policy of handling failed job replicas. In particular, it allows to + specify the set of actions and conditions which need to be + satisfied to take the associated action. + If empty, the default behaviour applies - the counter of failed job replicas + is incremented and it is checked against the backoffLimit. + properties: + rules: + description: |- + A list of failure policy rules. The rules are evaluated in order. + Once a rule matches a job replica failure, the remaining of the rules are ignored. + When no rule matches the failure, the default handling applies - the + counter of failures is incremented and it is checked against + the backoffLimit. + items: + description: RadixJobComponentFailurePolicyRule describes + how a job replica failure is handled when the onExitCodes + rules are met. + properties: + action: + description: Specifies the action taken on a job replica + failure when the onExitCodes requirements are satisfied. + enum: + - FailJob + - Ignore + - Count + type: string + onExitCodes: + description: Represents the requirement on the job + replica exit codes. + properties: + operator: + description: |- + Represents the relationship between the job replica's exit code and the + specified values. Replicas completed with success (exit code 0) are + excluded from the requirement check. + enum: + - In + - NotIn + type: string + values: + description: |- + Specifies the set of values. The job replica's exit code is checked against this set of + values with respect to the operator. The list must not contain duplicates. + Value '0' cannot be used for the In operator. + items: + format: int32 + minimum: 0 + type: integer + maxItems: 255 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - operator + - values + type: object + required: + - action + - onExitCodes + type: object + maxItems: 20 + type: array + x-kubernetes-list-type: atomic + required: + - rules + type: object identity: description: |- Configuration for workload identity (federated credentials). diff --git a/charts/radix-operator/templates/radixbatch.yaml b/charts/radix-operator/templates/radixbatch.yaml index ab9c3ed94..80b3124cf 100644 --- a/charts/radix-operator/templates/radixbatch.yaml +++ b/charts/radix-operator/templates/radixbatch.yaml @@ -65,11 +65,73 @@ spec: format: int32 minimum: 0 type: integer + failurePolicy: + description: FailurePolicy specifies the policy of handling + failed job replicas + properties: + rules: + description: |- + A list of failure policy rules. The rules are evaluated in order. + Once a rule matches a job replica failure, the remaining of the rules are ignored. + When no rule matches the failure, the default handling applies - the + counter of failures is incremented and it is checked against + the backoffLimit. + items: + description: RadixJobComponentFailurePolicyRule describes + how a job replica failure is handled when the onExitCodes + rules are met. + properties: + action: + description: Specifies the action taken on a job replica + failure when the onExitCodes requirements are satisfied. + enum: + - FailJob + - Ignore + - Count + type: string + onExitCodes: + description: Represents the requirement on the job + replica exit codes. + properties: + operator: + description: |- + Represents the relationship between the job replica's exit code and the + specified values. Replicas completed with success (exit code 0) are + excluded from the requirement check. + enum: + - In + - NotIn + type: string + values: + description: |- + Specifies the set of values. The job replica's exit code is checked against this set of + values with respect to the operator. The list must not contain duplicates. + Value '0' cannot be used for the In operator. + items: + format: int32 + minimum: 0 + type: integer + maxItems: 255 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - operator + - values + type: object + required: + - action + - onExitCodes + type: object + maxItems: 20 + type: array + x-kubernetes-list-type: atomic + required: + - rules + type: object imageTagName: - description: |- - ImageTagName defines the image tag name to use for the job image - - required: false + description: ImageTagName defines the image tag name to use + for the job image type: string jobId: description: Defines a user defined ID of the job. diff --git a/charts/radix-operator/templates/radixdeployment.yaml b/charts/radix-operator/templates/radixdeployment.yaml index 838678a13..6948d6723 100644 --- a/charts/radix-operator/templates/radixdeployment.yaml +++ b/charts/radix-operator/templates/radixdeployment.yaml @@ -241,6 +241,445 @@ spec: - fqdn type: object type: array + healthChecks: + properties: + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + Defaults to TCP Probe against the first listed port + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + format: int32 + minimum: 1 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC + port. + properties: + port: + description: Port number of the gRPC service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + enum: + - HTTPS + - HTTP + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + default: 10 + description: How often (in seconds) to perform the probe. + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Must be 1 for liveness and startup. + format: int32 + minimum: 1 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving + a TCP port. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + description: port number to access on the container. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - port + type: object + timeoutSeconds: + default: 1 + description: |- + Number of seconds after which the probe times out. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + minimum: 1 + type: integer + type: object + type: object horizontalScaling: description: RadixHorizontalScaling defines configuration for horizontal pod autoscaler. @@ -1074,6 +1513,70 @@ spec: description: 'Map of environment variables in the form '': ''' type: object + failurePolicy: + description: FailurePolicy specifies the policy of handling + failed job replicas + properties: + rules: + description: |- + A list of failure policy rules. The rules are evaluated in order. + Once a rule matches a job replica failure, the remaining of the rules are ignored. + When no rule matches the failure, the default handling applies - the + counter of failures is incremented and it is checked against + the backoffLimit. + items: + description: RadixJobComponentFailurePolicyRule describes + how a job replica failure is handled when the onExitCodes + rules are met. + properties: + action: + description: Specifies the action taken on a job replica + failure when the onExitCodes requirements are satisfied. + enum: + - FailJob + - Ignore + - Count + type: string + onExitCodes: + description: Represents the requirement on the job + replica exit codes. + properties: + operator: + description: |- + Represents the relationship between the job replica's exit code and the + specified values. Replicas completed with success (exit code 0) are + excluded from the requirement check. + enum: + - In + - NotIn + type: string + values: + description: |- + Specifies the set of values. The job replica's exit code is checked against this set of + values with respect to the operator. The list must not contain duplicates. + Value '0' cannot be used for the In operator. + items: + format: int32 + minimum: 0 + type: integer + maxItems: 255 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - operator + - values + type: object + required: + - action + - onExitCodes + type: object + maxItems: 20 + type: array + x-kubernetes-list-type: atomic + required: + - rules + type: object identity: description: Identity configuration for federation with external identity providers. diff --git a/code b/code new file mode 100644 index 000000000..9bfe9effe --- /dev/null +++ b/code @@ -0,0 +1,186 @@ +apiVersion: v1 +items: +- apiVersion: batch/v1 + kind: Job + metadata: + annotations: + radix.equinor.com/radix-deployment-observed-generation: "1" + creationTimestamp: "2024-12-02T12:42:58Z" + generation: 1 + labels: + radix-app: radix-job-demo + radix-batch-job-name: t3czygs6 + radix-batch-name: batch-compute4dxj9sejr-20241202124258-plbab1dc + radix-component: compute + radix-job-type: job-scheduler + name: batch-compute4dxj9sejr-20241202124258-plbab1dc-t3czygs6 + namespace: radix-job-demo-qa + ownerReferences: + - apiVersion: radix.equinor.com/v1 + controller: true + kind: RadixBatch + name: batch-compute4dxj9sejr-20241202124258-plbab1dc + uid: 96bf2c52-0bdc-41f2-acdf-4bb9bbeeec99 + resourceVersion: "208334" + uid: 155d9fd0-6c1e-47fd-a973-c0569cf92f89 + spec: + backoffLimit: 0 + completionMode: NonIndexed + completions: 1 + manualSelector: false + parallelism: 1 + podReplacementPolicy: TerminatingOrFailed + selector: + matchLabels: + batch.kubernetes.io/controller-uid: 155d9fd0-6c1e-47fd-a973-c0569cf92f89 + suspend: false + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "false" + creationTimestamp: null + labels: + azure.workload.identity/use: "true" + batch.kubernetes.io/controller-uid: 155d9fd0-6c1e-47fd-a973-c0569cf92f89 + batch.kubernetes.io/job-name: batch-compute4dxj9sejr-20241202124258-plbab1dc-t3czygs6 + controller-uid: 155d9fd0-6c1e-47fd-a973-c0569cf92f89 + job-name: batch-compute4dxj9sejr-20241202124258-plbab1dc-t3czygs6 + radix-app: radix-job-demo + radix-batch-job-name: t3czygs6 + radix-batch-name: batch-compute4dxj9sejr-20241202124258-plbab1dc + radix-component: compute + radix-job-type: job-scheduler + spec: + activeDeadlineSeconds: 300 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nodepooltasks + operator: Exists + - key: kubernetes.io/os + operator: In + values: + - linux + - key: kubernetes.io/arch + operator: In + values: + - amd64 + automountServiceAccountToken: false + containers: + - env: + - name: CALLBACK_ON_COMPLETE_URL + valueFrom: + configMapKeyRef: + key: CALLBACK_ON_COMPLETE_URL + name: env-vars-compute + - name: COMPUTE_CONFIG + valueFrom: + configMapKeyRef: + key: COMPUTE_CONFIG + name: env-vars-compute + - name: RADIX_ACTIVE_CLUSTER_EGRESS_IPS + value: 104.45.84.0,104.45.84.1 + - name: RADIX_APP + value: radix-job-demo + - name: RADIX_CLUSTERNAME + value: weekly-49 + - name: RADIX_CLUSTER_TYPE + value: development + - name: RADIX_COMPONENT + value: compute + - name: RADIX_CONTAINER_REGISTRY + value: radixdev.azurecr.io + - name: RADIX_DNS_ZONE + value: dev.radix.equinor.com + - name: RADIX_ENVIRONMENT + value: qa + - name: RADIX_GIT_COMMIT_HASH + value: ae25ff6afa72da2b66f857d40e9d74d429e97077 + - name: RADIX_GIT_TAGS + - name: RADIX_PORTS + value: (9999 9090) + - name: RADIX_PORT_NAMES + value: (http metrics2) + - name: SQL_DATABASE_NAME + valueFrom: + configMapKeyRef: + key: SQL_DATABASE_NAME + name: env-vars-compute + - name: SQL_SERVER_NAME + valueFrom: + configMapKeyRef: + key: SQL_SERVER_NAME + name: env-vars-compute + - name: RADIX_JOB_NAME + value: batch-compute4dxj9sejr-20241202124258-plbab1dc-t3czygs6 + image: radixdev.azurecr.io/radix-job-demo-qa-compute:6u6ge + imagePullPolicy: Always + name: compute + ports: + - containerPort: 9999 + name: http + protocol: TCP + - containerPort: 9090 + name: metrics2 + protocol: TCP + resources: + limits: + memory: 100Mi + requests: + cpu: 50m + memory: 100Mi + securityContext: + allowPrivilegeEscalation: false + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /compute/args + name: job-payload + readOnly: true + dnsPolicy: ClusterFirst + imagePullSecrets: + - name: radix-external-registry-default-auth + restartPolicy: Never + schedulerName: default-scheduler + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + serviceAccount: compute-sa + serviceAccountName: compute-sa + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: nodepooltasks + operator: Exists + volumes: + - name: job-payload + secret: + defaultMode: 420 + items: + - key: t3czygs6 + path: payload + secretName: batch-compute4dxj9sejr-20241202124258-plbab1dc-payloads-0 + ttlSecondsAfterFinished: 86400 + status: + completionTime: "2024-12-02T12:43:03Z" + conditions: + - lastProbeTime: "2024-12-02T12:43:04Z" + lastTransitionTime: "2024-12-02T12:43:04Z" + status: "True" + type: Complete + ready: 0 + startTime: "2024-12-02T12:42:58Z" + succeeded: 1 + terminating: 0 + uncountedTerminatedPods: {} +kind: List +metadata: + resourceVersion: "" diff --git a/go.mod b/go.mod index c900f234b..ebe0da8b0 100644 --- a/go.mod +++ b/go.mod @@ -21,9 +21,9 @@ require ( github.com/spf13/cobra v1.8.1 github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 - golang.org/x/crypto v0.26.0 + golang.org/x/crypto v0.31.0 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 - golang.org/x/sync v0.8.0 + golang.org/x/sync v0.10.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.31.0 k8s.io/apiextensions-apiserver v0.31.0 @@ -84,9 +84,9 @@ require ( golang.org/x/mod v0.20.0 // indirect golang.org/x/net v0.28.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect - golang.org/x/sys v0.24.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/term v0.27.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.24.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 06838d4bf..cdb22974e 100644 --- a/go.sum +++ b/go.sum @@ -177,8 +177,8 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -199,8 +199,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -210,15 +210,15 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/json-schema/radixapplication.json b/json-schema/radixapplication.json index c4c1a3ff3..e9c63893c 100644 --- a/json-schema/radixapplication.json +++ b/json-schema/radixapplication.json @@ -400,6 +400,459 @@ "pattern": "^(([a-z0-9][-a-z0-9]*)?[a-z0-9])?$", "type": "string" }, + "healthChecks": { + "description": "HealthChecks can tell Radix if your application is ready to receive traffic.\nDefaults to a TCP check against your first listed port.\nIf any healthchecks are defined, no defaults will be added and you should add your own readinessProbe.", + "properties": { + "livenessProbe": { + "description": "Periodic probe of container liveness.\nContainer will be restarted if the probe fails.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", + "type": "string" + }, + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + }, + "path": { + "description": "Path to access on the HTTP server.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", + "enum": [ + "HTTPS", + "HTTP" + ], + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + }, + "readinessProbe": { + "description": "Periodic probe of container service readiness.\nContainer will be removed from service endpoints if the probe fails.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes\nDefaults to TCP Probe against the first listed port", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", + "type": "string" + }, + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + }, + "path": { + "description": "Path to access on the HTTP server.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", + "enum": [ + "HTTPS", + "HTTP" + ], + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + }, + "startupProbe": { + "description": "StartupProbe indicates that the Pod has successfully initialized.\nIf specified, no other probes are executed until this completes successfully.\nIf this probe fails, the Pod will be restarted, just as if the livenessProbe failed.\nThis can be used to provide different probe parameters at the beginning of a Pod's lifecycle,\nwhen it might take a long time to load data or warm a cache, than during steady-state operation.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", + "type": "string" + }, + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + }, + "path": { + "description": "Path to access on the HTTP server.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", + "enum": [ + "HTTPS", + "HTTP" + ], + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + } + }, + "type": "object" + }, "horizontalScaling": { "description": "Configuration for automatic horizontal scaling of replicas.\nMore info: https://www.radix.equinor.com/references/reference-radix-config/#horizontalscaling", "properties": { @@ -1048,98 +1501,551 @@ "type": "boolean" } }, - "required": [ - "container" - ], - "type": "object" + "required": [ + "container" + ], + "type": "object" + }, + "container": { + "description": "Deprecated. Only required by the deprecated type: blob.", + "type": "string" + }, + "emptyDir": { + "description": "EmptyDir settings for EmptyDir volume", + "properties": { + "sizeLimit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ], + "description": "SizeLimit defines the size of the emptyDir volume", + "pattern": "^(\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))))?$", + "x-kubernetes-int-or-string": true + } + }, + "required": [ + "sizeLimit" + ], + "type": "object" + }, + "gid": { + "description": "GID defines the group ID (number) which will be set as owner of the mounted volume.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "type": "string" + }, + "name": { + "description": "User-defined name of the volume mount.\nMust be unique for the component.", + "maxLength": 40, + "minLength": 1, + "type": "string" + }, + "path": { + "description": "Path defines in which directory the external storage is mounted.", + "minLength": 1, + "type": "string" + }, + "requestsStorage": { + "description": "More info: https://www.radix.equinor.com/guides/volume-mounts/optional-settings/\nDeprecated, use BlobFuse2 or AzureFile instead.", + "type": "string" + }, + "skuName": { + "description": "More info: https://www.radix.equinor.com/guides/volume-mounts/optional-settings/\nDeprecated, use BlobFuse2 or AzureFile instead.", + "type": "string" + }, + "storage": { + "description": "Storage defines the name of the container in the external storage resource.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "type": "string" + }, + "type": { + "description": "Type defines the storage type.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "enum": [ + "blob", + "azure-blob", + "azure-file", + "" + ], + "type": "string" + }, + "uid": { + "description": "UID defines the user ID (number) which will be set as owner of the mounted volume.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "type": "string" + } + }, + "required": [ + "name", + "path" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": [ + "environment" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-map-keys": [ + "environment" + ], + "x-kubernetes-list-type": "map" + }, + "healthChecks": { + "description": "HealthChecks can tell Radix if your application is ready to receive traffic.\nDefaults to a TCP check against your first listed port.\nIf any healthchecks are defined, no defaults will be added and you should add your own readinessProbe.", + "properties": { + "livenessProbe": { + "description": "Periodic probe of container liveness.\nContainer will be restarted if the probe fails.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", + "type": "string" + }, + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + }, + "path": { + "description": "Path to access on the HTTP server.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", + "enum": [ + "HTTPS", + "HTTP" + ], + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", + "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + }, + "readinessProbe": { + "description": "Periodic probe of container service readiness.\nContainer will be removed from service endpoints if the probe fails.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes\nDefaults to TCP Probe against the first listed port", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" }, - "container": { - "description": "Deprecated. Only required by the deprecated type: blob.", + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", "type": "string" }, - "emptyDir": { - "description": "EmptyDir settings for EmptyDir volume", - "properties": { - "sizeLimit": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ], - "description": "SizeLimit defines the size of the emptyDir volume", - "pattern": "^(\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))))?$", - "x-kubernetes-int-or-string": true - } + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" }, - "required": [ - "sizeLimit" - ], - "type": "object" + "type": "array", + "x-kubernetes-list-type": "atomic" }, - "gid": { - "description": "GID defines the group ID (number) which will be set as owner of the mounted volume.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "path": { + "description": "Path to access on the HTTP server.", "type": "string" }, - "name": { - "description": "User-defined name of the volume mount.\nMust be unique for the component.", - "maxLength": 40, - "minLength": 1, - "type": "string" + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" }, - "path": { - "description": "Path defines in which directory the external storage is mounted.", - "minLength": 1, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", + "enum": [ + "HTTPS", + "HTTP" + ], "type": "string" - }, - "requestsStorage": { - "description": "More info: https://www.radix.equinor.com/guides/volume-mounts/optional-settings/\nDeprecated, use BlobFuse2 or AzureFile instead.", + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", "type": "string" }, - "skuName": { - "description": "More info: https://www.radix.equinor.com/guides/volume-mounts/optional-settings/\nDeprecated, use BlobFuse2 or AzureFile instead.", + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + }, + "startupProbe": { + "description": "StartupProbe indicates that the Pod has successfully initialized.\nIf specified, no other probes are executed until this completes successfully.\nIf this probe fails, the Pod will be restarted, just as if the livenessProbe failed.\nThis can be used to provide different probe parameters at the beginning of a Pod's lifecycle,\nwhen it might take a long time to load data or warm a cache, than during steady-state operation.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "properties": { + "exec": { + "description": "Exec specifies the action to take.", + "properties": { + "command": { + "description": "Command is the command line to execute inside the container, the working directory for the\ncommand is root ('/') in the container's filesystem. The command is simply exec'd, it is\nnot run inside a shell, so traditional shell instructions ('|', etc) won't work. To use\na shell, you need to explicitly call out to that shell.\nExit status of 0 is treated as live/healthy and non-zero is unhealthy.", + "items": { + "type": "string" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "type": "object" + }, + "failureThreshold": { + "default": 3, + "description": "Minimum consecutive failures for the probe to be considered failed after having succeeded.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "grpc": { + "description": "GRPC specifies an action involving a GRPC port.", + "properties": { + "port": { + "description": "Port number of the gRPC service.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "service": { + "default": "", + "description": "Service is the name of the service to place in the gRPC HealthCheckRequest\n(see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).\n\nIf this is not specified, the default behavior is defined by gRPC.", + "type": "string" + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "httpGet": { + "description": "HTTPGet specifies the http request to perform.", + "properties": { + "host": { + "description": "Host name to connect to, defaults to the pod IP. You probably want to set\n\"Host\" in httpHeaders instead.", "type": "string" }, - "storage": { - "description": "Storage defines the name of the container in the external storage resource.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "httpHeaders": { + "description": "Custom headers to set in the request. HTTP allows repeated headers.", + "items": { + "description": "HTTPHeader describes a custom header to be used in HTTP probes", + "properties": { + "name": { + "description": "The header field name.\nThis will be canonicalized upon output, so case-variant names will be understood as the same header.", + "type": "string" + }, + "value": { + "description": "The header field value", + "type": "string" + } + }, + "required": [ + "name", + "value" + ], + "type": "object" + }, + "type": "array", + "x-kubernetes-list-type": "atomic" + }, + "path": { + "description": "Path to access on the HTTP server.", "type": "string" }, - "type": { - "description": "Type defines the storage type.\nDeprecated, use BlobFuse2 or AzureFile instead.", + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" + }, + "scheme": { + "description": "Scheme to use for connecting to the host.\nDefaults to HTTP.", "enum": [ - "blob", - "azure-blob", - "azure-file", - "" + "HTTPS", + "HTTP" ], "type": "string" - }, - "uid": { - "description": "UID defines the user ID (number) which will be set as owner of the mounted volume.\nDeprecated, use BlobFuse2 or AzureFile instead.", + } + }, + "required": [ + "port" + ], + "type": "object" + }, + "initialDelaySeconds": { + "description": "Number of seconds after the container has started before liveness probes are initiated.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "type": "integer" + }, + "periodSeconds": { + "default": 10, + "description": "How often (in seconds) to perform the probe.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "successThreshold": { + "default": 1, + "description": "Minimum consecutive successes for the probe to be considered successful after having failed.\nMust be 1 for liveness and startup.", + "format": "int32", + "minimum": 1, + "type": "integer" + }, + "tcpSocket": { + "description": "TCPSocket specifies an action involving a TCP port.", + "properties": { + "host": { + "description": "Optional: Host name to connect to, defaults to the pod IP.", "type": "string" + }, + "port": { + "description": "port number to access on the container.", + "format": "int32", + "maximum": 65535, + "minimum": 1, + "type": "integer" } }, "required": [ - "name", - "path" + "port" ], "type": "object" }, - "type": "array" - } - }, - "required": [ - "environment" - ], - "type": "object" + "timeoutSeconds": { + "default": 1, + "description": "Number of seconds after which the probe times out.\nMore info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + "format": "int32", + "minimum": 1, + "type": "integer" + } + }, + "type": "object" + } }, - "type": "array", - "x-kubernetes-list-map-keys": [ - "environment" - ], - "x-kubernetes-list-type": "map" + "type": "object" }, "horizontalScaling": { "description": "Configuration for automatic horizontal scaling of replicas.\nMore info: https://www.radix.equinor.com/references/reference-radix-config/#horizontalscaling", @@ -2370,6 +3276,70 @@ "pattern": "^(([a-z0-9][-a-z0-9]*)?[a-z0-9])?$", "type": "string" }, + "failurePolicy": { + "description": "Specifies the policy of handling failed job replicas. In particular, it allows to\nspecify the set of actions and conditions which need to be\nsatisfied to take the associated action.\nIf empty, the default behaviour applies - the counter of failed job replicas\nis incremented and it is checked against the backoffLimit.", + "properties": { + "rules": { + "description": "A list of failure policy rules. The rules are evaluated in order.\nOnce a rule matches a job replica failure, the remaining of the rules are ignored.\nWhen no rule matches the failure, the default handling applies - the\ncounter of failures is incremented and it is checked against\nthe backoffLimit.", + "items": { + "description": "RadixJobComponentFailurePolicyRule describes how a job replica failure is handled when the onExitCodes rules are met.", + "properties": { + "action": { + "description": "Specifies the action taken on a job replica failure when the onExitCodes requirements are satisfied.", + "enum": [ + "FailJob", + "Ignore", + "Count" + ], + "type": "string" + }, + "onExitCodes": { + "description": "Represents the requirement on the job replica exit codes.", + "properties": { + "operator": { + "description": "Represents the relationship between the job replica's exit code and the\nspecified values. Replicas completed with success (exit code 0) are\nexcluded from the requirement check.", + "enum": [ + "In", + "NotIn" + ], + "type": "string" + }, + "values": { + "description": "Specifies the set of values. The job replica's exit code is checked against this set of\nvalues with respect to the operator. The list must not contain duplicates.\nValue '0' cannot be used for the In operator.", + "items": { + "format": "int32", + "minimum": 0, + "type": "integer" + }, + "maxItems": 255, + "minItems": 1, + "type": "array", + "x-kubernetes-list-type": "set" + } + }, + "required": [ + "operator", + "values" + ], + "type": "object" + } + }, + "required": [ + "action", + "onExitCodes" + ], + "type": "object" + }, + "maxItems": 20, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "required": [ + "rules" + ], + "type": "object" + }, "identity": { "description": "Environment specific configuration for workload identity (federated credentials).\nMore info: https://www.radix.equinor.com/references/reference-radix-config/#identity-2", "properties": { @@ -2847,6 +3817,70 @@ ], "x-kubernetes-list-type": "map" }, + "failurePolicy": { + "description": "Specifies the policy of handling failed job replicas. In particular, it allows to\nspecify the set of actions and conditions which need to be\nsatisfied to take the associated action.\nIf empty, the default behaviour applies - the counter of failed job replicas\nis incremented and it is checked against the backoffLimit.", + "properties": { + "rules": { + "description": "A list of failure policy rules. The rules are evaluated in order.\nOnce a rule matches a job replica failure, the remaining of the rules are ignored.\nWhen no rule matches the failure, the default handling applies - the\ncounter of failures is incremented and it is checked against\nthe backoffLimit.", + "items": { + "description": "RadixJobComponentFailurePolicyRule describes how a job replica failure is handled when the onExitCodes rules are met.", + "properties": { + "action": { + "description": "Specifies the action taken on a job replica failure when the onExitCodes requirements are satisfied.", + "enum": [ + "FailJob", + "Ignore", + "Count" + ], + "type": "string" + }, + "onExitCodes": { + "description": "Represents the requirement on the job replica exit codes.", + "properties": { + "operator": { + "description": "Represents the relationship between the job replica's exit code and the\nspecified values. Replicas completed with success (exit code 0) are\nexcluded from the requirement check.", + "enum": [ + "In", + "NotIn" + ], + "type": "string" + }, + "values": { + "description": "Specifies the set of values. The job replica's exit code is checked against this set of\nvalues with respect to the operator. The list must not contain duplicates.\nValue '0' cannot be used for the In operator.", + "items": { + "format": "int32", + "minimum": 0, + "type": "integer" + }, + "maxItems": 255, + "minItems": 1, + "type": "array", + "x-kubernetes-list-type": "set" + } + }, + "required": [ + "operator", + "values" + ], + "type": "object" + } + }, + "required": [ + "action", + "onExitCodes" + ], + "type": "object" + }, + "maxItems": 20, + "type": "array", + "x-kubernetes-list-type": "atomic" + } + }, + "required": [ + "rules" + ], + "type": "object" + }, "identity": { "description": "Configuration for workload identity (federated credentials).\nMore info: https://www.radix.equinor.com/references/reference-radix-config/#identity-2", "properties": { diff --git a/pkg/apis/batch/kubejob.go b/pkg/apis/batch/kubejob.go index 4f3e791b6..04bf41d0c 100644 --- a/pkg/apis/batch/kubejob.go +++ b/pkg/apis/batch/kubejob.go @@ -129,6 +129,11 @@ func (s *syncer) buildJob(ctx context.Context, batchJob *radixv1.RadixBatchJob, backoffLimit = numbers.Int32Ptr(0) } + failurePolicy := operatorUtils.GetPodFailurePolicy(jobComponent.FailurePolicy) + if batchJob.FailurePolicy != nil { + failurePolicy = operatorUtils.GetPodFailurePolicy(batchJob.FailurePolicy) + } + serviceAccountSpec := deployment.NewServiceAccountSpec(rd, jobComponent) job := &batchv1.Job{ @@ -139,7 +144,8 @@ func (s *syncer) buildJob(ctx context.Context, batchJob *radixv1.RadixBatchJob, Annotations: annotations.ForKubernetesDeploymentObservedGeneration(rd), }, Spec: batchv1.JobSpec{ - BackoffLimit: backoffLimit, + BackoffLimit: backoffLimit, + PodFailurePolicy: failurePolicy, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: podLabels, diff --git a/pkg/apis/batch/status.go b/pkg/apis/batch/status.go index 77875d281..fe00985e9 100644 --- a/pkg/apis/batch/status.go +++ b/pkg/apis/batch/status.go @@ -44,18 +44,17 @@ func (s *syncer) syncStatus(ctx context.Context, reconcileError error) error { currStatus.Condition.ActiveTime = nil currStatus.Condition.CompletionTime = nil case radixv1.BatchConditionTypeActive: - now := metav1.Now() if currStatus.Condition.ActiveTime == nil { - currStatus.Condition.ActiveTime = &now + currStatus.Condition.ActiveTime = &metav1.Time{Time: s.clock.Now()} } currStatus.Condition.CompletionTime = nil case radixv1.BatchConditionTypeCompleted: - now := metav1.Now() + now := &metav1.Time{Time: s.clock.Now()} if currStatus.Condition.ActiveTime == nil { - currStatus.Condition.ActiveTime = &now + currStatus.Condition.ActiveTime = now } if currStatus.Condition.CompletionTime == nil { - currStatus.Condition.CompletionTime = &now + currStatus.Condition.CompletionTime = now } } }) @@ -98,12 +97,6 @@ func (s *syncer) updateStatus(ctx context.Context, changeStatusFunc func(currSta return err } -func isJobStatusCondition(conditionType batchv1.JobConditionType) func(batchv1.JobCondition) bool { - return func(condition batchv1.JobCondition) bool { - return condition.Type == conditionType && condition.Status == corev1.ConditionTrue - } -} - func (s *syncer) buildJobStatuses(ctx context.Context) ([]radixv1.RadixBatchJobStatus, error) { var jobStatuses []radixv1.RadixBatchJobStatus @@ -147,13 +140,11 @@ func (s *syncer) buildBatchJobStatus(ctx context.Context, batchJob *radixv1.Radi if isBatchJobStopRequested(batchJob) { status.Phase = radixv1.BatchJobPhaseStopped - now := metav1.Now() - status.EndTime = &now + status.EndTime = &metav1.Time{Time: s.clock.Now()} if hasCurrentStatus { status.CreationTime = currentStatus.CreationTime status.StartTime = currentStatus.StartTime - status.Message = currentStatus.Message - status.Reason = currentStatus.Reason + status.Failed = currentStatus.Failed } s.updateJobAndPodStatuses(ctx, batchJob.Name, &status) return status @@ -163,58 +154,32 @@ func (s *syncer) buildBatchJobStatus(ctx context.Context, batchJob *radixv1.Radi if !jobFound { return status } - jobBackoffLimit := getJobBackoffLimit(job) + status.CreationTime = &job.CreationTimestamp + status.StartTime = job.Status.StartTime status.Failed = job.Status.Failed - var uncountedSucceeded, uncountedFailed int - if uncounted := job.Status.UncountedTerminatedPods; uncounted != nil { - uncountedSucceeded, uncountedFailed = len(uncounted.Succeeded), len(uncounted.Failed) - } - jobConditionsSortedDesc := getJobConditionsSortedDesc(job) - if (job.Status.Succeeded+int32(uncountedSucceeded)) > 0 && - s.setJobStatus(ctx, batchJob, &status, job, jobConditionsSortedDesc, radixv1.BatchJobPhaseSucceeded, batchv1.JobComplete) { - return status - } - if (job.Status.Failed+int32(uncountedFailed)) == jobBackoffLimit+1 && - s.setJobStatus(ctx, batchJob, &status, job, jobConditionsSortedDesc, radixv1.BatchJobPhaseFailed, batchv1.JobFailed) { - return status - } - if job.Status.Active > 0 { + if condition, ok := slice.FindFirst(job.Status.Conditions, hasOneOfConditionTypes(batchv1.JobComplete, batchv1.JobSuccessCriteriaMet)); ok { + status.Phase = radixv1.BatchJobPhaseSucceeded + status.EndTime = pointers.Ptr(condition.LastTransitionTime) + status.Reason = condition.Reason + status.Message = condition.Message + } else if condition, ok := slice.FindFirst(job.Status.Conditions, hasOneOfConditionTypes(batchv1.JobFailed)); ok { + status.Phase = radixv1.BatchJobPhaseFailed + status.EndTime = pointers.Ptr(condition.LastTransitionTime) + status.Reason = condition.Reason + status.Message = condition.Message + } else if job.Status.Active > 0 { status.Phase = radixv1.BatchJobPhaseActive - status.StartTime = job.Status.StartTime - if job.Status.Ready != nil && job.Status.Active == *job.Status.Ready { + if job.Status.Ready != nil && *job.Status.Ready > 0 { status.Phase = radixv1.BatchJobPhaseRunning } } - if len(jobConditionsSortedDesc) > 0 { - status.Reason = jobConditionsSortedDesc[0].Reason - status.Message = jobConditionsSortedDesc[0].Message - } + s.updateJobAndPodStatuses(ctx, batchJob.Name, &status) return status } -func (s *syncer) setJobStatus(ctx context.Context, batchJob *radixv1.RadixBatchJob, status *radixv1.RadixBatchJobStatus, job *batchv1.Job, jobConditionsSortedDesc []batchv1.JobCondition, phase radixv1.RadixBatchJobPhase, conditionType batchv1.JobConditionType) bool { - if condition, ok := slice.FindFirst(jobConditionsSortedDesc, isJobStatusCondition(conditionType)); ok { - status.Phase = phase - status.StartTime = job.Status.StartTime - status.Reason = condition.Reason - status.Message = condition.Message - status.EndTime = pointers.Ptr(condition.LastTransitionTime) - s.updateJobAndPodStatuses(ctx, batchJob.Name, status) - return true - } - return false -} - -func getJobBackoffLimit(job *batchv1.Job) int32 { - if job.Spec.BackoffLimit != nil { - return *job.Spec.BackoffLimit - } - return 0 -} - func (s *syncer) updateJobAndPodStatuses(ctx context.Context, batchJobName string, jobStatus *radixv1.RadixBatchJobStatus) { jobComponentName := s.radixBatch.GetLabels()[kube.RadixComponentLabel] podStatusMap := getPodStatusMap(jobStatus) @@ -344,14 +309,6 @@ func getPodStatusMap(status *radixv1.RadixBatchJobStatus) map[string]*radixv1.Ra return podStatusMap } -func getJobConditionsSortedDesc(job *batchv1.Job) []batchv1.JobCondition { - descSortedJobConditions := job.Status.Conditions - sort.Slice(descSortedJobConditions, func(i, j int) bool { - return descSortedJobConditions[i].LastTransitionTime.After(descSortedJobConditions[j].LastTransitionTime.Time) - }) - return descSortedJobConditions -} - func (s *syncer) restoreStatus(ctx context.Context) error { if restoredStatus, ok := s.radixBatch.Annotations[kube.RestoredStatusAnnotation]; ok && len(restoredStatus) > 0 { if reflect.ValueOf(s.radixBatch.Status).IsZero() { diff --git a/pkg/apis/batch/syncer.go b/pkg/apis/batch/syncer.go index 68f4d6407..5febda8eb 100644 --- a/pkg/apis/batch/syncer.go +++ b/pkg/apis/batch/syncer.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + commonutils "github.com/equinor/radix-common/utils" "github.com/equinor/radix-common/utils/slice" "github.com/equinor/radix-operator/pkg/apis/config" "github.com/equinor/radix-operator/pkg/apis/kube" @@ -26,16 +27,31 @@ type Syncer interface { OnSync(ctx context.Context) error } +type SyncerOption func(syncer *syncer) + +func WithClock(clock commonutils.Clock) SyncerOption { + return func(syncer *syncer) { + syncer.clock = clock + } +} + // NewSyncer Constructor os RadixBatches Syncer -func NewSyncer(kubeclient kubernetes.Interface, kubeUtil *kube.Kube, radixClient radixclient.Interface, radixBatch *radixv1.RadixBatch, config *config.Config) Syncer { - return &syncer{ +func NewSyncer(kubeclient kubernetes.Interface, kubeUtil *kube.Kube, radixClient radixclient.Interface, radixBatch *radixv1.RadixBatch, config *config.Config, options ...SyncerOption) Syncer { + syncer := &syncer{ kubeClient: kubeclient, kubeUtil: kubeUtil, radixClient: radixClient, radixBatch: radixBatch, config: config, restartedJobs: map[string]radixv1.RadixBatchJob{}, + clock: commonutils.RealClock{}, } + + for _, opt := range options { + opt(syncer) + } + + return syncer } type syncer struct { @@ -45,6 +61,7 @@ type syncer struct { radixBatch *radixv1.RadixBatch config *config.Config restartedJobs map[string]radixv1.RadixBatchJob + clock commonutils.Clock } // OnSync Syncs RadixBatches diff --git a/pkg/apis/batch/syncer_test.go b/pkg/apis/batch/syncer_test.go index 947981ae5..f20e33fb1 100644 --- a/pkg/apis/batch/syncer_test.go +++ b/pkg/apis/batch/syncer_test.go @@ -9,6 +9,7 @@ import ( "time" certfake "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned/fake" + commonutils "github.com/equinor/radix-common/utils" "github.com/equinor/radix-common/utils/numbers" "github.com/equinor/radix-common/utils/pointers" "github.com/equinor/radix-common/utils/slice" @@ -28,9 +29,9 @@ import ( "github.com/stretchr/testify/suite" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/fake" secretproviderfake "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned/fake" @@ -50,8 +51,8 @@ func TestSyncerTestSuite(t *testing.T) { suite.Run(t, new(syncerTestSuite)) } -func (s *syncerTestSuite) createSyncer(forJob *radixv1.RadixBatch, config *config.Config) Syncer { - return NewSyncer(s.kubeClient, s.kubeUtil, s.radixClient, forJob, config) +func (s *syncerTestSuite) createSyncer(forJob *radixv1.RadixBatch, config *config.Config, options ...SyncerOption) Syncer { + return NewSyncer(s.kubeClient, s.kubeUtil, s.radixClient, forJob, config, options...) } func (s *syncerTestSuite) applyRadixDeploymentEnvVarsConfigMaps(kubeUtil *kube.Kube, rd *radixv1.RadixDeployment) map[string]*corev1.ConfigMap { @@ -82,23 +83,19 @@ func (s *syncerTestSuite) ensurePopulatedEnvVarsConfigMaps(kubeUtil *kube.Kube, } func (s *syncerTestSuite) SetupTest() { - s.kubeClient = fake.NewSimpleClientset() - s.radixClient = fakeradix.NewSimpleClientset() - s.kedaClient = kedafake.NewSimpleClientset() - s.promClient = prometheusfake.NewSimpleClientset() - s.certClient = certfake.NewSimpleClientset() - s.kubeUtil, _ = kube.New(s.kubeClient, s.radixClient, s.kedaClient, secretproviderfake.NewSimpleClientset()) - s.T().Setenv(defaults.OperatorEnvLimitDefaultMemoryEnvironmentVariable, "1500Mi") - s.T().Setenv(defaults.OperatorRollingUpdateMaxUnavailable, "25%") - s.T().Setenv(defaults.OperatorRollingUpdateMaxSurge, "25%") - s.T().Setenv(defaults.OperatorDefaultUserGroupEnvironmentVariable, "any-group") + s.setupTest() } func (s *syncerTestSuite) SetupSubTest() { + s.setupTest() +} + +func (s *syncerTestSuite) setupTest() { s.kubeClient = fake.NewSimpleClientset() s.radixClient = fakeradix.NewSimpleClientset() s.kedaClient = kedafake.NewSimpleClientset() s.promClient = prometheusfake.NewSimpleClientset() + s.certClient = certfake.NewSimpleClientset() s.kubeUtil, _ = kube.New(s.kubeClient, s.radixClient, s.kedaClient, secretproviderfake.NewSimpleClientset()) s.T().Setenv(defaults.OperatorEnvLimitDefaultMemoryEnvironmentVariable, "1500Mi") s.T().Setenv(defaults.OperatorRollingUpdateMaxUnavailable, "25%") @@ -1293,11 +1290,14 @@ func (s *syncerTestSuite) Test_StopJob() { s.Require().NoError(err) _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) s.Require().NoError(err) + + // Run initial sync to ensure k8s jobs are created sut := s.createSyncer(batch, nil) s.Require().NoError(sut.OnSync(context.Background())) allJobs, _ := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) s.Require().Len(allJobs.Items, 2) + // Stop first job and check that k8s job is deleted batch.Spec.Jobs[0].Stop = pointers.Ptr(true) sut = s.createSyncer(batch, nil) s.Require().NoError(sut.OnSync(context.Background())) @@ -1469,832 +1469,617 @@ func (s *syncerTestSuite) Test_HandleJobStopWhenMissingRadixDeploymentConfig() { } -func (s *syncerTestSuite) Test_BatchStatusCondition() { - batchName, namespace, rdName := "any-batch", "any-ns", "any-rd" - job1Name, job2Name, job3Name := "job1", "job2", "job3" - batch := &radixv1.RadixBatch{ - ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, - Spec: radixv1.RadixBatchSpec{ - RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ - LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, - Job: "any-job", - }, - Jobs: []radixv1.RadixBatchJob{{Name: job1Name}, {Name: job2Name}, {Name: job3Name}}, - }, - } - rd := &radixv1.RadixDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: rdName}, - Spec: radixv1.RadixDeploymentSpec{ - AppName: "any-app", - Jobs: []radixv1.RadixDeployJobComponent{ - { - Name: "any-job", - }, - }, - }, - } - batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) - s.Require().NoError(err) - _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) - s.Require().NoError(err) - sut := s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - allJobs, err := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) - s.Require().NoError(err) - s.Require().Len(allJobs.Items, 3) - s.Require().ElementsMatch([]string{getKubeJobName(batchName, job1Name), getKubeJobName(batchName, job2Name), getKubeJobName(batchName, job3Name)}, slice.Map(allJobs.Items, func(job batchv1.Job) string { return job.GetName() })) - - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - // Initial condition is Waiting when all jobs are waiting - s.Equal(radixv1.BatchConditionTypeWaiting, batch.Status.Condition.Type) - s.Nil(batch.Status.Condition.ActiveTime) - s.Nil(batch.Status.Condition.CompletionTime) - - // Set job1 status.active to 1 => batch condition is Running - s.updateKubeJobStatus(getKubeJobName(batchName, job1Name), namespace)(func(status *batchv1.JobStatus) { - status.Active = 1 - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Equal(radixv1.BatchConditionTypeActive, batch.Status.Condition.Type) - s.NotNil(batch.Status.Condition.ActiveTime) - s.Nil(batch.Status.Condition.CompletionTime) +func (s *syncerTestSuite) Test_BatchJobStatus() { + const ( + namespace = "any-ns" + appName = "any-app" + rdName = "any-rd" + jobComponentName = "any-job" + batchName = "any-rb" + batchJobName = "any-batch-job" + ) - // Set job3 to stopped => batch condition is Running - batch.Spec.Jobs[2].Stop = pointers.Ptr(true) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Equal(radixv1.BatchConditionTypeActive, batch.Status.Condition.Type) - s.NotNil(batch.Status.Condition.ActiveTime) - s.Nil(batch.Status.Condition.CompletionTime) - - // Set job2 condition to failed => batch condition is Failing - s.updateKubeJobStatus(getKubeJobName(batchName, job2Name), namespace)(func(status *batchv1.JobStatus) { - status.Failed = 1 - status.Conditions = []batchv1.JobCondition{ - {Type: batchv1.JobFailed, Status: corev1.ConditionTrue}, - } - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Equal(radixv1.BatchConditionTypeActive, batch.Status.Condition.Type) - s.NotNil(batch.Status.Condition.ActiveTime) - s.Nil(batch.Status.Condition.CompletionTime) - - // Set job1 condition to failed => batch condition is Failed - s.updateKubeJobStatus(getKubeJobName(batchName, job1Name), namespace)(func(status *batchv1.JobStatus) { - status.Active = 0 - status.Succeeded = 1 - status.Conditions = []batchv1.JobCondition{ - {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, - } - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Equal(radixv1.BatchConditionTypeCompleted, batch.Status.Condition.Type) - s.NotNil(batch.Status.Condition.ActiveTime) - s.NotNil(batch.Status.Condition.CompletionTime) -} + var ( + now time.Time = time.Date(2024, 1, 20, 8, 00, 00, 0, time.Local) + ) -func (s *syncerTestSuite) Test_BatchJobStatusWaitingToSucceeded() { - batchName, namespace, rdName := "any-batch", "any-ns", "any-rd" - jobName := "myjob" - jobStartTime, jobCompletionTime := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.Local)), metav1.NewTime(time.Date(2020, 1, 2, 0, 0, 0, 0, time.Local)) - batch := &radixv1.RadixBatch{ - ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, - Spec: radixv1.RadixBatchSpec{ - RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ - LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, - Job: "any-job", - }, - Jobs: []radixv1.RadixBatchJob{{Name: jobName}}, - }, + type kubeJobSpec struct { + creationTimestamp metav1.Time + status batchv1.JobStatus } - rd := &radixv1.RadixDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: rdName}, - Spec: radixv1.RadixDeploymentSpec{ - AppName: "any-app", - Jobs: []radixv1.RadixDeployJobComponent{ - { - Name: "any-job", - BackoffLimit: pointers.Ptr(int32(2)), - }, - }, - }, - } - batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) - s.Require().NoError(err) - _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) - s.Require().NoError(err) - sut := s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - allJobs, err := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) - s.Require().NoError(err) - s.Require().Len(allJobs.Items, 1) - s.Equal(getKubeJobName(batchName, jobName), allJobs.Items[0].GetName()) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - // Initial phase is Waiting - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseWaiting, batch.Status.JobStatuses[0].Phase) - s.Equal(int32(0), batch.Status.JobStatuses[0].Failed) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Nil(batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.active to 1 => phase is Active - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 1 - status.StartTime = &jobStartTime - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseActive, batch.Status.JobStatuses[0].Phase) - s.Equal(int32(0), batch.Status.JobStatuses[0].Failed) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.failed to 2 - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 1 - status.Failed = 2 - status.StartTime = &jobStartTime - status.Conditions = []batchv1.JobCondition{ - {Type: batchv1.JobFailed, Status: corev1.ConditionTrue}, - } - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseActive, batch.Status.JobStatuses[0].Phase) - s.Equal(int32(2), batch.Status.JobStatuses[0].Failed) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.conditions to complete => phase is Succeeded - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 0 - status.Succeeded = 1 - status.Conditions = []batchv1.JobCondition{ - {Type: batchv1.JobComplete, Status: corev1.ConditionTrue, LastTransitionTime: jobCompletionTime}, - } - status.StartTime = &jobStartTime - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseSucceeded, batch.Status.JobStatuses[0].Phase) - s.Equal(int32(2), batch.Status.JobStatuses[0].Failed) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.Equal(&jobCompletionTime, batch.Status.JobStatuses[0].EndTime) -} - -func (s *syncerTestSuite) Test_BatchJobStatusWaitingToFailed() { - batchName, namespace, rdName := "any-batch", "any-ns", "any-rd" - jobName := "myjob" - jobStartTime, jobFailedTime := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.Local)), metav1.NewTime(time.Date(2020, 1, 2, 0, 0, 0, 0, time.Local)) - batch := &radixv1.RadixBatch{ - ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, - Spec: radixv1.RadixBatchSpec{ - RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ - LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, - Job: "any-job", - }, - Jobs: []radixv1.RadixBatchJob{{Name: jobName}}, - }, + type jobSpec struct { + stop bool + kubeJob *kubeJobSpec + currentJobStatus *radixv1.RadixBatchJobStatus } - rd := &radixv1.RadixDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: rdName}, - Spec: radixv1.RadixDeploymentSpec{ - AppName: "any-app", - Jobs: []radixv1.RadixDeployJobComponent{ - { - Name: "any-job", - }, - }, - }, - } - batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) - s.Require().NoError(err) - _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) - s.Require().NoError(err) - sut := s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - allJobs, err := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) - s.Require().NoError(err) - s.Require().Len(allJobs.Items, 1) - s.Equal(getKubeJobName(batchName, jobName), allJobs.Items[0].GetName()) - - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - // Initial phase is Waiting - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseWaiting, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Nil(batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.active to 1 => phase is Active - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 1 - status.StartTime = &jobStartTime - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseActive, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.conditions to failed => phase is Failed - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 0 - status.Failed = 1 - status.Conditions = []batchv1.JobCondition{ - {Type: batchv1.JobFailed, Status: corev1.ConditionTrue, LastTransitionTime: jobFailedTime}, - } - status.StartTime = &jobStartTime - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseFailed, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) -} -func (s *syncerTestSuite) Test_BatchJobStatusWaitingToStopped() { - batchName, namespace, rdName := "any-batch", "any-ns", "any-rd" - jobName := "myjob" - jobStartTime := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.Local)) - batch := &radixv1.RadixBatch{ - ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, - Spec: radixv1.RadixBatchSpec{ - RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ - LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, - Job: "any-job", - }, - Jobs: []radixv1.RadixBatchJob{{Name: jobName}}, - }, - } - rd := &radixv1.RadixDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: rdName}, - Spec: radixv1.RadixDeploymentSpec{ - AppName: "any-app", - Jobs: []radixv1.RadixDeployJobComponent{ - { - Name: "any-job", + tests := map[string]struct { + job jobSpec + expectedStatus radixv1.RadixBatchJobStatus + }{ + "kubejob not active and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseWaiting, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Failed: 5, + }, + }, + "kubejob not active and current status is set": { + job: jobSpec{ + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, - }, - }, - } - batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) - s.Require().NoError(err) - _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) - s.Require().NoError(err) - sut := s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - allJobs, err := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) - s.Require().NoError(err) - s.Require().Len(allJobs.Items, 1) - s.Equal(getKubeJobName(batchName, jobName), allJobs.Items[0].GetName()) - - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - // Initial phase is Waiting - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseWaiting, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Nil(batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.active to 1 => phase is Active - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(func(status *batchv1.JobStatus) { - status.Active = 1 - status.StartTime = &jobStartTime - }) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseActive, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.Nil(batch.Status.JobStatuses[0].EndTime) - - // Set job status.conditions to failed => phase is Failed - batch.Spec.Jobs[0].Stop = pointers.Ptr(true) - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - s.Require().Len(batch.Status.JobStatuses, 1) - s.Equal(jobName, batch.Status.JobStatuses[0].Name) - s.Equal(radixv1.BatchJobPhaseStopped, batch.Status.JobStatuses[0].Phase) - s.Empty(batch.Status.JobStatuses[0].Reason) - s.Empty(batch.Status.JobStatuses[0].Message) - s.Equal(&allJobs.Items[0].CreationTimestamp, batch.Status.JobStatuses[0].CreationTime) - s.Equal(&jobStartTime, batch.Status.JobStatuses[0].StartTime) - s.NotNil(batch.Status.JobStatuses[0].EndTime) -} - -func (s *syncerTestSuite) Test_BatchStatus() { - namespace, rdName := "any-ns", "any-rd" - jobStartTime, jobCompletionTime := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.Local)), metav1.NewTime(time.Date(2020, 1, 2, 0, 0, 0, 0, time.Local)) - type expectedJobStatusProps struct { - phase radixv1.RadixBatchJobPhase - } - type expectedBatchStatusProps struct { - conditionType radixv1.RadixBatchConditionType - } - type updateJobStatus struct { - updateRadixBatchJobFunc func(job *radixv1.RadixBatchJob) - updateRadixBatchJobStatusFunc func(status *radixv1.RadixBatchJobStatus) - } - type scenario struct { - name string - initialJobStatuses map[string]func(status *batchv1.JobStatus) - updateJobStatuses map[string]updateJobStatus - jobNames []string - expectedJobStatuses map[string]expectedJobStatusProps - expectedBatchStatus expectedBatchStatusProps - } - startJobStatusFunc := func(status *batchv1.JobStatus) { - status.Active = 1 - status.StartTime = &jobStartTime - } - succeededJobStatusFunc := func(status *radixv1.RadixBatchJobStatus) { - status.Phase = radixv1.BatchJobPhaseSucceeded - status.EndTime = &jobCompletionTime - } - failedJobStatusFunc := func(status *radixv1.RadixBatchJobStatus) { - status.Phase = radixv1.BatchJobPhaseFailed - status.EndTime = &jobCompletionTime - } - stoppedJobStatusFunc := func(status *radixv1.RadixBatchJobStatus) { - status.Phase = radixv1.BatchJobPhaseStopped - status.EndTime = &jobCompletionTime - } - scenarios := []scenario{ - { - name: "all waiting - batch is waiting", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){}, - updateJobStatuses: map[string]updateJobStatus{}, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseWaiting}, - "j2": {phase: radixv1.BatchJobPhaseWaiting}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeWaiting, - }, - }, - { - name: "one waiting, one active - batch is active", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{}, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseActive}, - "j2": {phase: radixv1.BatchJobPhaseWaiting}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "all active - batch is active", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{}, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseActive}, - "j2": {phase: radixv1.BatchJobPhaseActive}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "one active, one succeeded - batch is active", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j2": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: "any-current-phase", + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Failed: 5, + Restart: "any-current-restart", + }, + }, + + "job stop set and no current status": { + job: jobSpec{ + stop: true, + }, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseStopped, + EndTime: &metav1.Time{Time: now}, + }, + }, + "job stopped and current status is set": { + job: jobSpec{ + stop: true, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseActive}, - "j2": {phase: radixv1.BatchJobPhaseSucceeded}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "one waiting, one active, one succeeded - batch is active", - jobNames: []string{"j1", "j2", "j3"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j2": startJobStatusFunc, - "j3": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j3": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseStopped, + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now}, + Restart: "any-current-restart", + Failed: 100, + }, + }, + "kubejob active and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseActive, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Failed: 5, + }, + }, + "kubejob active and current status is set": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseWaiting}, - "j2": {phase: radixv1.BatchJobPhaseActive}, - "j3": {phase: radixv1.BatchJobPhaseSucceeded}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "all completed - batch is succeeded", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, - }, - "j2": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseActive, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Restart: "any-current-restart", + Failed: 5, + }, + }, + "kubejob active, ready and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseRunning, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Failed: 5, + }, + }, + "kubejob active, ready and current status is set": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{Type: "any-condition-type", Status: corev1.ConditionTrue, Reason: "any-condition-reason", Message: "any-condition-message", LastTransitionTime: metav1.Now()}}, + }}, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseSucceeded}, - "j2": {phase: radixv1.BatchJobPhaseSucceeded}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, - }, - { - name: "all failed - batch is failed", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, - }, - "j2": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseRunning, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Restart: "any-current-restart", + Failed: 5, + }, + }, + "kubejob Complete condition and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseSucceeded, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Failed: 5, + }, + }, + "kubejob Complete condition and current status is set": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseFailed}, - "j2": {phase: radixv1.BatchJobPhaseFailed}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, - }, - { - name: "one waiting, one failed - batch is failed", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j2": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseSucceeded, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Restart: "any-current-restart", + Failed: 5, + }, + }, + "kubejob SuccessCriteriaMet condition and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobSuccessCriteriaMet, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseSucceeded, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Failed: 5, + }, + }, + "kubejob SuccessCriteriaMet condition and current status is set": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobSuccessCriteriaMet, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseWaiting}, - "j2": {phase: radixv1.BatchJobPhaseFailed}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "one active, one failed - batch is failed", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j2": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseSucceeded, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Restart: "any-current-restart", + Failed: 5, + }, + }, + "kubejob Failed condition and no current status": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}}, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseFailed, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Failed: 5, + }, + }, + "kubejob Failed condition and current status is set": { + job: jobSpec{ + kubeJob: &kubeJobSpec{ + creationTimestamp: metav1.Time{Time: now.Add(-5 * time.Hour)}, + status: batchv1.JobStatus{ + Failed: 5, + Active: 1, + Ready: pointers.Ptr[int32](1), + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + Conditions: []batchv1.JobCondition{{ + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + Reason: "any-condition-reason", + Message: "any-condition-message", + LastTransitionTime: metav1.Time{Time: now.Add(-4 * time.Hour)}}, + }, + }}, + currentJobStatus: &radixv1.RadixBatchJobStatus{ + Restart: "any-current-restart", + CreationTime: &metav1.Time{Time: now.Add(-15 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-16 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-17 * time.Hour)}, + Phase: "any-current-phase", + Reason: "any-current-reason", + Message: "any-current-message", + Failed: 100, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseActive}, - "j2": {phase: radixv1.BatchJobPhaseFailed}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, + expectedStatus: radixv1.RadixBatchJobStatus{ + Phase: radixv1.BatchJobPhaseFailed, + CreationTime: &metav1.Time{Time: now.Add(-5 * time.Hour)}, + StartTime: &metav1.Time{Time: now.Add(-6 * time.Hour)}, + EndTime: &metav1.Time{Time: now.Add(-4 * time.Hour)}, + Reason: "any-condition-reason", + Message: "any-condition-message", + Restart: "any-current-restart", + Failed: 5, }, }, - { - name: "one succeeded, one failed - batch is failed", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, - }, - "j2": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, + } + + jobLabelsFunc := func(jobName string) labels.Set { + return radixlabels.Merge( + radixlabels.ForApplicationName(appName), + radixlabels.ForComponentName(jobComponentName), + radixlabels.ForBatchName(batchName), + radixlabels.ForJobScheduleJobType(), + radixlabels.ForBatchJobName(jobName), + ) + } + + for testName, test := range tests { + s.Run(testName, func() { + + rd := &radixv1.RadixDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: rdName}, + Spec: radixv1.RadixDeploymentSpec{AppName: appName, Jobs: []radixv1.RadixDeployJobComponent{{Name: jobComponentName}}}, + } + _, err := s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) + s.Require().NoError(err) + + batch := &radixv1.RadixBatch{ + ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, + Spec: radixv1.RadixBatchSpec{ + RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, Job: jobComponentName}, + Jobs: []radixv1.RadixBatchJob{{Name: batchJobName, Stop: &test.job.stop}}, }, - }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseSucceeded}, - "j2": {phase: radixv1.BatchJobPhaseFailed}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, - }, - { - name: "one stopped, one failed - batch is failed", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) + } + if currentStatus := test.job.currentJobStatus; currentStatus != nil { + batch.Status.JobStatuses = []radixv1.RadixBatchJobStatus{*currentStatus} + batch.Status.JobStatuses[0].Name = batchJobName + } + batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) + s.Require().NoError(err) + + // Create k8s jobs required for building batch status + if kubeJob := test.job.kubeJob; kubeJob != nil { + j := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: getKubeJobName(batchName, batchJobName), + Namespace: namespace, + Labels: jobLabelsFunc(batchJobName), + CreationTimestamp: kubeJob.creationTimestamp, }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, - }, - "j2": { - updateRadixBatchJobStatusFunc: failedJobStatusFunc, + Status: kubeJob.status, + } + _, err = s.kubeClient.BatchV1().Jobs(namespace).Create(context.Background(), j, metav1.CreateOptions{}) + s.Require().NoError(err) + } + + // Run test + sut := s.createSyncer(batch, nil, WithClock(commonutils.NewFakeClock(now))) + s.Require().NoError(sut.OnSync(context.Background())) + batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batchName, metav1.GetOptions{}) + s.Require().NoError(err) + + expectedStatus := test.expectedStatus + expectedStatus.Name = batchJobName + s.Equal([]radixv1.RadixBatchJobStatus{expectedStatus}, batch.Status.JobStatuses) + }) + } +} + +func (s *syncerTestSuite) Test_BatchStatusCondition() { + const ( + namespace = "any-ns" + appName = "any-app" + rdName = "any-rd" + jobComponentName = "any-job" + batchName = "any-rb" + ) + + var ( + now time.Time = time.Date(2024, 1, 20, 8, 00, 00, 0, time.Local) + ) + + type jobSpec struct { + stop bool + kubeJobStatus batchv1.JobStatus + } + + type partialBatchStatus struct { + jobPhases []radixv1.RadixBatchJobPhase + condition radixv1.RadixBatchCondition + } + + waitingJob := jobSpec{kubeJobStatus: batchv1.JobStatus{}} + activeJob := jobSpec{kubeJobStatus: batchv1.JobStatus{Active: 1}} + runningJob := jobSpec{kubeJobStatus: batchv1.JobStatus{Active: 1, Ready: pointers.Ptr[int32](1)}} + succeededJob := jobSpec{kubeJobStatus: batchv1.JobStatus{Conditions: []batchv1.JobCondition{{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}}}} + failedJob := jobSpec{kubeJobStatus: batchv1.JobStatus{Conditions: []batchv1.JobCondition{{Type: batchv1.JobFailed, Status: corev1.ConditionTrue}}}} + stoppedJob := jobSpec{stop: true} + + tests := map[string]struct { + jobs map[string]jobSpec + expectedStatus partialBatchStatus + }{ + "Waiting condition when all jobs Waiting": { + jobs: map[string]jobSpec{ + "job1": waitingJob, + "job2": waitingJob, + }, + expectedStatus: partialBatchStatus{ + jobPhases: []radixv1.RadixBatchJobPhase{radixv1.BatchJobPhaseWaiting, radixv1.BatchJobPhaseWaiting}, + condition: radixv1.RadixBatchCondition{ + Type: radixv1.BatchConditionTypeWaiting, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseStopped}, - "j2": {phase: radixv1.BatchJobPhaseFailed}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, }, - { - name: "all stopped - batch is stopped", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) - }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, - }, - "j2": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) - }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, - }, + "Completed condition when all jobs in done phase": { + jobs: map[string]jobSpec{ + "job1": stoppedJob, + "job2": succeededJob, + "job3": failedJob, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseStopped}, - "j2": {phase: radixv1.BatchJobPhaseStopped}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, - }, - { - name: "one waiting, one stopped - batch is stopped", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) - }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, + expectedStatus: partialBatchStatus{ + jobPhases: []radixv1.RadixBatchJobPhase{radixv1.BatchJobPhaseStopped, radixv1.BatchJobPhaseSucceeded, radixv1.BatchJobPhaseFailed}, + condition: radixv1.RadixBatchCondition{ + Type: radixv1.BatchConditionTypeCompleted, + ActiveTime: &metav1.Time{Time: now}, + CompletionTime: &metav1.Time{Time: now}, }, }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseStopped}, - "j2": {phase: radixv1.BatchJobPhaseWaiting}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, }, - { - name: "one active, one stopped - batch is stopped", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) - }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, + } + + // Build test spec for all job status phases that will lead to an Active condition + activeJobsMap := map[radixv1.RadixBatchJobPhase]jobSpec{ + radixv1.BatchJobPhaseActive: activeJob, + radixv1.BatchJobPhaseRunning: runningJob, + } + allJobsMap := map[radixv1.RadixBatchJobPhase]jobSpec{ + radixv1.BatchJobPhaseWaiting: waitingJob, + radixv1.BatchJobPhaseActive: activeJob, + radixv1.BatchJobPhaseRunning: runningJob, + radixv1.BatchJobPhaseStopped: stoppedJob, + radixv1.BatchJobPhaseSucceeded: succeededJob, + radixv1.BatchJobPhaseFailed: failedJob, + } + for activeStatus, activeJob := range activeJobsMap { + for anyStatus, anyJob := range allJobsMap { + tests[fmt.Sprintf("Completed condition when jobs %s and %s", activeStatus, anyStatus)] = struct { + jobs map[string]jobSpec + expectedStatus partialBatchStatus + }{ + jobs: map[string]jobSpec{ + "job1": activeJob, + "job2": anyJob, }, - }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseStopped}, - "j2": {phase: radixv1.BatchJobPhaseActive}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeActive, - }, - }, - { - name: "one succeeded, one stopped - batch is stopped", - jobNames: []string{"j1", "j2"}, - initialJobStatuses: map[string]func(status *batchv1.JobStatus){ - "j1": startJobStatusFunc, - "j2": startJobStatusFunc, - }, - updateJobStatuses: map[string]updateJobStatus{ - "j1": { - updateRadixBatchJobFunc: func(job *radixv1.RadixBatchJob) { - job.Stop = pointers.Ptr(true) + expectedStatus: partialBatchStatus{ + jobPhases: []radixv1.RadixBatchJobPhase{activeStatus, anyStatus}, + condition: radixv1.RadixBatchCondition{ + Type: radixv1.BatchConditionTypeActive, + ActiveTime: &metav1.Time{Time: now}, }, - updateRadixBatchJobStatusFunc: stoppedJobStatusFunc, - }, - "j2": { - updateRadixBatchJobStatusFunc: succeededJobStatusFunc, }, - }, - expectedJobStatuses: map[string]expectedJobStatusProps{ - "j1": {phase: radixv1.BatchJobPhaseStopped}, - "j2": {phase: radixv1.BatchJobPhaseSucceeded}, - }, - expectedBatchStatus: expectedBatchStatusProps{ - conditionType: radixv1.BatchConditionTypeCompleted, - }, - }, + } + } } - rd := &radixv1.RadixDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: rdName}, - Spec: radixv1.RadixDeploymentSpec{ - AppName: "any-app", - Jobs: []radixv1.RadixDeployJobComponent{ - { - Name: "any-job", - }, - }, - }, + + jobLabelsFunc := func(jobName string) labels.Set { + return radixlabels.Merge( + radixlabels.ForApplicationName(appName), + radixlabels.ForComponentName(jobComponentName), + radixlabels.ForBatchName(batchName), + radixlabels.ForJobScheduleJobType(), + radixlabels.ForBatchJobName(jobName), + ) } - _, err := s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) - s.Require().NoError(err) - for _, ts := range scenarios { - s.T().Run(ts.name, func(t *testing.T) { - batchName := utils.RandString(10) - batch := &radixv1.RadixBatch{ - ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForBatchScheduleJobType()}, - Spec: radixv1.RadixBatchSpec{ - RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ - LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, - Job: "any-job", - }, - Jobs: slice.Reduce(ts.jobNames, make([]radixv1.RadixBatchJob, 0), func(acc []radixv1.RadixBatchJob, jobName string) []radixv1.RadixBatchJob { - return append(acc, radixv1.RadixBatchJob{Name: jobName}) - }), - }, + for testName, test := range tests { + s.Run(testName, func() { + + rd := &radixv1.RadixDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: rdName}, + Spec: radixv1.RadixDeploymentSpec{AppName: appName, Jobs: []radixv1.RadixDeployJobComponent{{Name: jobComponentName}}}, } - batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) - s.Require().NoError(err) - sut := s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - allJobs, err := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: radixlabels.ForBatchName(batchName).String(), - }) + _, err := s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) s.Require().NoError(err) - s.Require().Len(allJobs.Items, len(ts.jobNames)) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) - s.Require().NoError(err) - // Initial phase is Waiting - s.Require().Len(batch.Status.JobStatuses, len(ts.jobNames)) - for _, jobStatus := range batch.Status.JobStatuses { - s.Equal(radixv1.BatchJobPhaseWaiting, jobStatus.Phase) - s.Equal(&allJobs.Items[0].CreationTimestamp, jobStatus.CreationTime) - s.Empty(jobStatus.Reason) - s.Empty(jobStatus.Message) - s.Nil(jobStatus.StartTime) - s.Nil(jobStatus.EndTime) + batch := &radixv1.RadixBatch{ + ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, + Spec: radixv1.RadixBatchSpec{ + RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, Job: jobComponentName}, + Jobs: []radixv1.RadixBatchJob{}, + }, } - - for jobName, setStatusFunc := range ts.initialJobStatuses { - s.updateKubeJobStatus(getKubeJobName(batchName, jobName), namespace)(setStatusFunc) + for batchJobName, job := range test.jobs { + batch.Spec.Jobs = append(batch.Spec.Jobs, radixv1.RadixBatchJob{Name: batchJobName, Stop: &job.stop}) + + j := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: getKubeJobName(batchName, batchJobName), + Namespace: namespace, + Labels: jobLabelsFunc(batchJobName), + }, + Status: job.kubeJobStatus, + } + _, err = s.kubeClient.BatchV1().Jobs(namespace).Create(context.Background(), j, metav1.CreateOptions{}) + s.Require().NoError(err) } - sut = s.createSyncer(batch, nil) - s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) + batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) s.Require().NoError(err) - jobMap := getRadixBatchJobsMap(batch) - jobStatusMap := getRadixBatchJobStatusesMap(batch) - for jobName, update := range ts.updateJobStatuses { - job, ok := jobMap[jobName] - s.Require().True(ok, "Not found expected job %s", jobName) - if update.updateRadixBatchJobFunc != nil { - update.updateRadixBatchJobFunc(job) - } - if update.updateRadixBatchJobStatusFunc != nil { - status, ok := jobStatusMap[jobName] - s.Require().True(ok, "Not found expected status for the job %s", jobName) - update.updateRadixBatchJobStatusFunc(status) - } - } - sut = s.createSyncer(batch, nil) + // Run test + sut := s.createSyncer(batch, nil, WithClock(commonutils.NewFakeClock(now))) s.Require().NoError(sut.OnSync(context.Background())) - batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batch.GetName(), metav1.GetOptions{}) + batch, err = s.radixClient.RadixV1().RadixBatches(namespace).Get(context.Background(), batchName, metav1.GetOptions{}) s.Require().NoError(err) - jobStatusMap = getRadixBatchJobStatusesMap(batch) - s.Require().Len(jobStatusMap, len(ts.expectedJobStatuses), "expectedJobStatuses does not match jobStatusMap") - for _, jobStatus := range batch.Status.JobStatuses { - jobStatusProps, ok := ts.expectedJobStatuses[jobStatus.Name] - s.Require().True(ok, "Not found expected job status %s", jobStatus.Name) - s.Equal(jobStatusProps.phase, jobStatus.Phase, "unexpected job status phase") - } - s.Require().Equal(ts.expectedBatchStatus.conditionType, batch.Status.Condition.Type, "unexpected batch status condition type") + + actualJobPhases := slice.Map(batch.Status.JobStatuses, func(s radixv1.RadixBatchJobStatus) radixv1.RadixBatchJobPhase { return s.Phase }) + s.ElementsMatch(test.expectedStatus.jobPhases, actualJobPhases) + s.Equal(test.expectedStatus.condition, batch.Status.Condition) }) } } @@ -2618,35 +2403,77 @@ func (s *syncerTestSuite) Test_RestartCorrectlyHandledWithIntermediateStatusUpda s.Equal(getKubeJobName(batchName, batchJobName), jobs.Items[0].GetName()) } -func getRadixBatchJobsMap(batch *radixv1.RadixBatch) map[string]*radixv1.RadixBatchJob { - batchJobsMap := make(map[string]*radixv1.RadixBatchJob) - for i := 0; i < len(batch.Spec.Jobs); i++ { - batchJobsMap[batch.Spec.Jobs[i].Name] = &batch.Spec.Jobs[i] +func (s *syncerTestSuite) Test_FailurePolicy() { + appName, batchName, componentName, namespace, rdName := "any-app", "any-batch", "compute", "any-ns", "any-rd" + jobName1, jobName2 := "any-job1", "any-job2" + rdFailurePolicy := &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + { + Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, + OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{ + Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, + Values: []int32{5, 3, 4}, + }, + }, + }, } - return batchJobsMap -} - -func getRadixBatchJobStatusesMap(batch *radixv1.RadixBatch) map[string]*radixv1.RadixBatchJobStatus { - jobStatusMap := make(map[string]*radixv1.RadixBatchJobStatus) - for i := 0; i < len(batch.Status.JobStatuses); i++ { - jobStatusMap[batch.Status.JobStatuses[i].Name] = &batch.Status.JobStatuses[i] + batchJobFailurePolicy := &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + { + Action: radixv1.RadixJobComponentFailurePolicyActionIgnore, + OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{ + Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpNotIn, + Values: []int32{4, 2, 3}, + }, + }, + }, } - return jobStatusMap -} - -func (s *syncerTestSuite) updateKubeJobStatus(jobName, namespace string) func(updater func(status *batchv1.JobStatus)) { - job, err := s.kubeClient.BatchV1().Jobs(namespace).Get(context.Background(), jobName, metav1.GetOptions{}) - if err != nil { - if errors.IsNotFound(err) { - return func(updater func(status *batchv1.JobStatus)) {} - } - s.FailNow(err.Error()) + batch := &radixv1.RadixBatch{ + ObjectMeta: metav1.ObjectMeta{Name: batchName, Labels: radixlabels.ForJobScheduleJobType()}, + Spec: radixv1.RadixBatchSpec{ + RadixDeploymentJobRef: radixv1.RadixDeploymentJobComponentSelector{ + LocalObjectReference: radixv1.LocalObjectReference{Name: rdName}, + Job: componentName, + }, + Jobs: []radixv1.RadixBatchJob{ + {Name: jobName1}, + {Name: jobName2, FailurePolicy: batchJobFailurePolicy}, + }, + }, } - return func(updater func(status *batchv1.JobStatus)) { - updater(&job.Status) - _, err := s.kubeClient.BatchV1().Jobs(namespace).Update(context.Background(), job, metav1.UpdateOptions{}) - if err != nil { - s.FailNow(err.Error()) - } + rd := &radixv1.RadixDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: rdName}, + Spec: radixv1.RadixDeploymentSpec{ + AppName: appName, + Jobs: []radixv1.RadixDeployJobComponent{ + { + Name: componentName, + FailurePolicy: rdFailurePolicy, + }, + }, + }, } + batch, err := s.radixClient.RadixV1().RadixBatches(namespace).Create(context.Background(), batch, metav1.CreateOptions{}) + s.Require().NoError(err) + _, err = s.radixClient.RadixV1().RadixDeployments(namespace).Create(context.Background(), rd, metav1.CreateOptions{}) + s.Require().NoError(err) + + sut := s.createSyncer(batch, nil) + s.Require().NoError(sut.OnSync(context.Background())) + jobs, _ := s.kubeClient.BatchV1().Jobs(namespace).List(context.Background(), metav1.ListOptions{}) + s.Require().Len(jobs.Items, 2) + + findJobByName := func(name string) func(j batchv1.Job) bool { + return func(j batchv1.Job) bool { return j.ObjectMeta.Name == getKubeJobName(batchName, name) } + } + + job1, found := slice.FindFirst(jobs.Items, findJobByName(jobName1)) + s.Require().True(found) + expectedPodFailurePolicy := utils.GetPodFailurePolicy(rd.Spec.Jobs[0].FailurePolicy) + s.Equal(expectedPodFailurePolicy, job1.Spec.PodFailurePolicy) + + job2, found := slice.FindFirst(jobs.Items, findJobByName(jobName2)) + s.Require().True(found) + expectedPodFailurePolicy = utils.GetPodFailurePolicy(batch.Spec.Jobs[1].FailurePolicy) + s.Equal(expectedPodFailurePolicy, job2.Spec.PodFailurePolicy) } diff --git a/pkg/apis/batch/utils.go b/pkg/apis/batch/utils.go index d7fa0dfaa..82f6b32f8 100644 --- a/pkg/apis/batch/utils.go +++ b/pkg/apis/batch/utils.go @@ -4,9 +4,11 @@ import ( "fmt" "github.com/equinor/radix-common/utils" + "github.com/equinor/radix-common/utils/slice" radixv1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" radixlabels "github.com/equinor/radix-operator/pkg/apis/utils/labels" batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubelabels "k8s.io/apimachinery/pkg/labels" ) @@ -54,3 +56,11 @@ func isKubeJobForBatchJob(batchJob *radixv1.RadixBatchJob) func(job *batchv1.Job return isResourceLabeledWithBatchJobName(batchJob.Name, job) } } + +func hasOneOfConditionTypes(conditionTypes ...batchv1.JobConditionType) func(batchv1.JobCondition) bool { + return func(condition batchv1.JobCondition) bool { + return slice.Any(conditionTypes, func(c batchv1.JobConditionType) bool { + return condition.Type == c && condition.Status == corev1.ConditionTrue + }) + } +} diff --git a/pkg/apis/deployment/jobschedulercomponent.go b/pkg/apis/deployment/jobschedulercomponent.go index bd705c50c..514185110 100644 --- a/pkg/apis/deployment/jobschedulercomponent.go +++ b/pkg/apis/deployment/jobschedulercomponent.go @@ -21,6 +21,10 @@ func newJobSchedulerComponent(jobComponent *radixv1.RadixDeployJobComponent, rd } } +func (js *jobSchedulerComponent) GetHealthChecks() *radixv1.RadixHealthChecks { + return nil +} + func (js *jobSchedulerComponent) GetImage() string { containerRegistry := os.Getenv(defaults.ContainerRegistryEnvironmentVariable) radixJobScheduler := os.Getenv(defaults.OperatorRadixJobSchedulerEnvironmentVariable) diff --git a/pkg/apis/deployment/kubedeployment.go b/pkg/apis/deployment/kubedeployment.go index d7084319c..df5cb5df4 100644 --- a/pkg/apis/deployment/kubedeployment.go +++ b/pkg/apis/deployment/kubedeployment.go @@ -315,11 +315,17 @@ func (deploy *Deployment) setDesiredDeploymentProperties(ctx context.Context, de } desiredDeployment.Spec.Template.Spec.Containers[0].VolumeMounts = volumeMounts - readinessProbe, err := getReadinessProbeForComponent(deployComponent) - if err != nil { - return err + if hc := deployComponent.GetHealthChecks(); hc != nil { + desiredDeployment.Spec.Template.Spec.Containers[0].ReadinessProbe = hc.ReadinessProbe.MapToCoreProbe() + desiredDeployment.Spec.Template.Spec.Containers[0].LivenessProbe = hc.LivenessProbe.MapToCoreProbe() + desiredDeployment.Spec.Template.Spec.Containers[0].StartupProbe = hc.StartupProbe.MapToCoreProbe() + } else { + readinessProbe, err := getDefaultReadinessProbeForComponent(deployComponent) + if err != nil { + return err + } + desiredDeployment.Spec.Template.Spec.Containers[0].ReadinessProbe = readinessProbe } - desiredDeployment.Spec.Template.Spec.Containers[0].ReadinessProbe = readinessProbe environmentVariables, err := GetEnvironmentVariablesForRadixOperator(ctx, deploy.kubeutil, appName, deploy.radixDeployment, deployComponent) if err != nil { @@ -449,7 +455,7 @@ func (deploy *Deployment) isEligibleForGarbageCollectComponent(componentName Rad return componentType != commonComponent.GetType() } -func getReadinessProbeForComponent(component v1.RadixCommonDeployComponent) (*corev1.Probe, error) { +func getDefaultReadinessProbeForComponent(component v1.RadixCommonDeployComponent) (*corev1.Probe, error) { if len(component.GetPorts()) == 0 { return nil, nil } diff --git a/pkg/apis/deployment/kubedeployment_test.go b/pkg/apis/deployment/kubedeployment_test.go index 202b0aa85..54c694fa2 100644 --- a/pkg/apis/deployment/kubedeployment_test.go +++ b/pkg/apis/deployment/kubedeployment_test.go @@ -11,6 +11,7 @@ import ( "github.com/equinor/radix-operator/pkg/apis/test" "github.com/equinor/radix-operator/pkg/apis/utils" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -22,7 +23,7 @@ func teardownReadinessProbe() { func TestGetReadinessProbe_MissingDefaultEnvVars(t *testing.T) { teardownReadinessProbe() - probe, err := getReadinessProbeForComponent(&v1.RadixDeployComponent{Ports: []v1.ComponentPort{{Name: "http", Port: int32(80)}}}) + probe, err := getDefaultReadinessProbeForComponent(&v1.RadixDeployComponent{Ports: []v1.ComponentPort{{Name: "http", Port: int32(80)}}}) assert.Error(t, err) assert.Nil(t, probe) } @@ -30,7 +31,7 @@ func TestGetReadinessProbe_MissingDefaultEnvVars(t *testing.T) { func TestGetReadinessProbe_Custom(t *testing.T) { test.SetRequiredEnvironmentVariables() - probe, err := getReadinessProbeForComponent(&v1.RadixDeployComponent{Ports: []v1.ComponentPort{{Name: "http", Port: int32(5000)}}}) + probe, err := getDefaultReadinessProbeForComponent(&v1.RadixDeployComponent{Ports: []v1.ComponentPort{{Name: "http", Port: int32(5000)}}}) assert.Nil(t, err) assert.Equal(t, int32(5), probe.InitialDelaySeconds) @@ -39,6 +40,57 @@ func TestGetReadinessProbe_Custom(t *testing.T) { teardownReadinessProbe() } +func TestComponentWithoutCustomHealthChecks(t *testing.T) { + tu, client, kubeUtil, radixclient, kedaClient, prometheusclient, _, certClient := SetupTest(t) + rd, _ := ApplyDeploymentWithSync(tu, client, kubeUtil, radixclient, kedaClient, prometheusclient, certClient, + utils.ARadixDeployment(). + WithComponents(utils.NewDeployComponentBuilder(). + WithName("comp1")). + WithAppName("any-app"). + WithEnvironment("test")) + + component := rd.GetComponentByName("comp1") + assert.Nil(t, component.HealthChecks) +} +func TestComponentWithCustomHealthChecks(t *testing.T) { + tu, client, kubeUtil, radixclient, kedaClient, prometheusclient, _, certClient := SetupTest(t) + createProbe := func(handler v1.RadixProbeHandler, seconds int32) *v1.RadixProbe { + return &v1.RadixProbe{ + RadixProbeHandler: handler, + InitialDelaySeconds: seconds, + TimeoutSeconds: seconds + 1, + PeriodSeconds: seconds + 2, + SuccessThreshold: seconds + 3, + FailureThreshold: seconds + 4, + // TerminationGracePeriodSeconds: pointers.Ptr(int64(seconds + 5)), + } + } + + readynessProbe := createProbe(v1.RadixProbeHandler{HTTPGet: &v1.RadixProbeHTTPGetAction{ + Port: 5000, + }}, 10) + + livenessProbe := createProbe(v1.RadixProbeHandler{TCPSocket: &v1.RadixProbeTCPSocketAction{ + Port: 5000, + }}, 20) + startuProbe := createProbe(v1.RadixProbeHandler{Exec: &v1.RadixProbeExecAction{ + Command: []string{"echo", "hello"}, + }}, 30) + + rd, _ := ApplyDeploymentWithSync(tu, client, kubeUtil, radixclient, kedaClient, prometheusclient, certClient, + utils.ARadixDeployment(). + WithComponents(utils.NewDeployComponentBuilder(). + WithName("comp1"). + WithHealthChecks(startuProbe, readynessProbe, livenessProbe)). + WithAppName("any-app"). + WithEnvironment("test")) + + component := rd.GetComponentByName("comp1") + require.NotNil(t, component.HealthChecks) + assert.Equal(t, readynessProbe, component.HealthChecks.ReadinessProbe) + assert.Equal(t, livenessProbe, component.HealthChecks.LivenessProbe) + assert.Equal(t, startuProbe, component.HealthChecks.StartupProbe) +} func Test_UpdateResourcesInDeployment(t *testing.T) { origRequests := map[string]string{"cpu": "10m", "memory": "100M"} diff --git a/pkg/apis/deployment/networkpolicy.go b/pkg/apis/deployment/networkpolicy.go index a811bb5e7..cb148adbf 100644 --- a/pkg/apis/deployment/networkpolicy.go +++ b/pkg/apis/deployment/networkpolicy.go @@ -74,26 +74,47 @@ func allowOauthAuxComponentEgressNetworkPolicy(appName string, env string, owner // This is because egress rule must allow traffic to the login.microsoftonline.com FQDN. // This FQDN has IP ranges 20.190.128.0/18 and 40.126.0.0/18 as of April 2022, // but may change at some point in the future. - return allowAllHttpsAndDnsEgressNetworkPolicy("radix-allow-oauth-aux-egress", kube.RadixAuxiliaryComponentTypeLabel, defaults.OAuthProxyAuxiliaryComponentType, 443, appName, env, owner) + return allowEgressNetworkByPortPolicy("radix-allow-oauth-aux-egress", kube.RadixAuxiliaryComponentTypeLabel, defaults.OAuthProxyAuxiliaryComponentType, appName, env, owner, []egreessPortPolicy{ + {port: 53, protocol: corev1.ProtocolTCP}, + {port: 53, protocol: corev1.ProtocolUDP}, + {port: 443, protocol: corev1.ProtocolTCP}, + {port: 6379, protocol: corev1.ProtocolTCP}, // Redis Plain + {port: 6380, protocol: corev1.ProtocolTCP}, // Redis TLS + }) } func allowJobSchedulerServerEgressNetworkPolicy(appName string, env string, owner []metav1.OwnerReference, kubernetesApiPort int32) *v1.NetworkPolicy { // We allow outbound to entire Internet from the job scheduler server pods. // This is because egress rule must allow traffic to public IP of k8s API server, // and the public IP is dynamic. - return allowAllHttpsAndDnsEgressNetworkPolicy("radix-allow-job-scheduler-egress", kube.RadixPodIsJobSchedulerLabel, "true", kubernetesApiPort, appName, env, owner) + return allowEgressNetworkByPortPolicy("radix-allow-job-scheduler-egress", kube.RadixPodIsJobSchedulerLabel, "true", appName, env, owner, []egreessPortPolicy{ + {port: 53, protocol: corev1.ProtocolTCP}, + {port: 53, protocol: corev1.ProtocolUDP}, + {port: kubernetesApiPort, protocol: corev1.ProtocolTCP}, + }) } func allowBatchSchedulerServerEgressNetworkPolicy(appName string, env string, owner []metav1.OwnerReference, kubernetesApiPort int32) *v1.NetworkPolicy { // We allow outbound to entire Internet from the batch scheduler server pods. // This is because egress rule must allow traffic to public IP of k8s API server, // and the public IP is dynamic. - return allowAllHttpsAndDnsEgressNetworkPolicy("radix-allow-batch-scheduler-egress", kube.RadixJobTypeLabel, kube.RadixJobTypeBatchSchedule, kubernetesApiPort, appName, env, owner) + return allowEgressNetworkByPortPolicy("radix-allow-batch-scheduler-egress", kube.RadixJobTypeLabel, kube.RadixJobTypeBatchSchedule, appName, env, owner, []egreessPortPolicy{ + {port: 53, protocol: corev1.ProtocolTCP}, + {port: 53, protocol: corev1.ProtocolUDP}, + {port: kubernetesApiPort, protocol: corev1.ProtocolTCP}, + }) } -func allowAllHttpsAndDnsEgressNetworkPolicy(policyName string, targetLabelKey string, targetLabelValue string, portNumber int32, appName string, env string, owner []metav1.OwnerReference) *v1.NetworkPolicy { - var tcp = corev1.ProtocolTCP - var udp = corev1.ProtocolUDP +type egreessPortPolicy struct { + port int32 + protocol corev1.Protocol +} + +func allowEgressNetworkByPortPolicy(policyName string, targetLabelKey string, targetLabelValue string, appName string, env string, owner []metav1.OwnerReference, egressPorts []egreessPortPolicy) *v1.NetworkPolicy { + var egressPortsV1 []v1.NetworkPolicyPort + for _, port := range egressPorts { + egressPortsV1 = append(egressPortsV1, v1.NetworkPolicyPort{Port: &intstr.IntOrString{IntVal: port.port}, Protocol: &port.protocol}) + } np := v1.NetworkPolicy{ ObjectMeta: metav1.ObjectMeta{ @@ -113,26 +134,7 @@ func allowAllHttpsAndDnsEgressNetworkPolicy(policyName string, targetLabelKey st }, Egress: []v1.NetworkPolicyEgressRule{ { - Ports: []v1.NetworkPolicyPort{ - { - Protocol: &tcp, - Port: &intstr.IntOrString{ - IntVal: portNumber, - }, - }, - { - Protocol: &tcp, - Port: &intstr.IntOrString{ - IntVal: 53, - }, - }, - { - Protocol: &udp, - Port: &intstr.IntOrString{ - IntVal: 53, - }, - }, - }, + Ports: egressPortsV1, }, { To: []v1.NetworkPolicyPeer{{ diff --git a/pkg/apis/deployment/radixcomponent.go b/pkg/apis/deployment/radixcomponent.go index a9e5e9213..2fee75d33 100644 --- a/pkg/apis/deployment/radixcomponent.go +++ b/pkg/apis/deployment/radixcomponent.go @@ -74,6 +74,7 @@ func GetRadixComponentsForEnv(ctx context.Context, radixApplication *radixv1.Rad deployComponent.AlwaysPullImageOnDeploy = getRadixComponentAlwaysPullImageOnDeployFlag(&radixComponent, environmentSpecificConfig) deployComponent.ExternalDNS = getExternalDNSAliasForComponentEnvironment(radixApplication, componentName, env) deployComponent.SecretRefs = getRadixCommonComponentRadixSecretRefs(&radixComponent, environmentSpecificConfig) + deployComponent.HealthChecks = getRadixCommonComponentHealthChecks(&radixComponent, environmentSpecificConfig) deployComponent.PublicPort = getRadixComponentPort(&radixComponent) deployComponent.Authentication = auth deployComponent.Identity = identity @@ -93,6 +94,34 @@ func GetRadixComponentsForEnv(ctx context.Context, radixApplication *radixv1.Rad return deployComponents, nil } +func getRadixCommonComponentHealthChecks(r *radixv1.RadixComponent, config *radixv1.RadixEnvironmentConfig) *radixv1.RadixHealthChecks { + if r.HealthChecks == nil && (config == nil || config.HealthChecks == nil) { + return nil + } + hc := &radixv1.RadixHealthChecks{} + if r.HealthChecks != nil { + hc.StartupProbe = r.HealthChecks.StartupProbe.DeepCopy() + hc.ReadinessProbe = r.HealthChecks.ReadinessProbe.DeepCopy() + hc.LivenessProbe = r.HealthChecks.LivenessProbe.DeepCopy() + } + + if config == nil || config.HealthChecks == nil { + return hc + } + + if config.HealthChecks.ReadinessProbe != nil { + hc.ReadinessProbe = config.HealthChecks.ReadinessProbe.DeepCopy() + } + if config.HealthChecks.LivenessProbe != nil { + hc.LivenessProbe = config.HealthChecks.LivenessProbe.DeepCopy() + } + if config.HealthChecks.StartupProbe != nil { + hc.StartupProbe = config.HealthChecks.StartupProbe.DeepCopy() + } + + return hc +} + func getRadixComponentNetwork(component *radixv1.RadixComponent, environmentConfig *radixv1.RadixEnvironmentConfig) (*radixv1.Network, error) { var dst *radixv1.Network if component.Network != nil { @@ -197,7 +226,7 @@ func getRadixCommonComponentVolumeMounts(radixComponent radixv1.RadixCommonCompo return finalVolumeMounts, nil } -func getBatchStatusRules(radixJobComponent *radixv1.RadixJobComponent, environmentSpecificConfig *radixv1.RadixJobComponentEnvironmentConfig) []radixv1.BatchStatusRule { +func getRadixJobComponentBatchStatusRules(radixJobComponent *radixv1.RadixJobComponent, environmentSpecificConfig *radixv1.RadixJobComponentEnvironmentConfig) []radixv1.BatchStatusRule { batchStatusRules := radixJobComponent.GetBatchStatusRules() if commonutils.IsNil(environmentSpecificConfig) || environmentSpecificConfig.BatchStatusRules == nil { return batchStatusRules diff --git a/pkg/apis/deployment/radixcomponent_test.go b/pkg/apis/deployment/radixcomponent_test.go index dc5157b41..f787646c7 100644 --- a/pkg/apis/deployment/radixcomponent_test.go +++ b/pkg/apis/deployment/radixcomponent_test.go @@ -1048,6 +1048,87 @@ func Test_GetRadixComponents_Monitoring(t *testing.T) { } } +func Test_GetRadixComponents_CustomHealthChecks(t *testing.T) { + createProbe := func(handler radixv1.RadixProbeHandler, seconds int32) *radixv1.RadixProbe { + return &radixv1.RadixProbe{ + RadixProbeHandler: handler, + InitialDelaySeconds: seconds, + TimeoutSeconds: seconds + 1, + PeriodSeconds: seconds + 2, + SuccessThreshold: seconds + 3, + FailureThreshold: seconds + 4, + // TerminationGracePeriodSeconds: pointers.Ptr(int64(seconds + 5)), + } + } + + httpProbe := radixv1.RadixProbeHandler{HTTPGet: &radixv1.RadixProbeHTTPGetAction{Port: 5000, Path: "/healthz", Scheme: corev1.URISchemeHTTP}} + execProbe := radixv1.RadixProbeHandler{Exec: &radixv1.RadixProbeExecAction{Command: []string{"/bin/sh", "-c", "/healthz /healthz"}}} + tcpProbe := radixv1.RadixProbeHandler{TCPSocket: &radixv1.RadixProbeTCPSocketAction{Port: 8000}} + + testCases := []struct { + description string + compHealthChecks *radixv1.RadixHealthChecks + envHealthChecks *radixv1.RadixHealthChecks + + expectedHealthChecks *radixv1.RadixHealthChecks + }{ + {"No configuration set results in default readieness probe", nil, nil, nil}, + { + description: "component has healthchecks, no env config", + compHealthChecks: &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 30), ReadinessProbe: createProbe(execProbe, 10), StartupProbe: createProbe(httpProbe, 20)}, + expectedHealthChecks: &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 30), ReadinessProbe: createProbe(execProbe, 10), StartupProbe: createProbe(httpProbe, 20)}, + }, + { + "Env healthchecks, no component healthchecks", + nil, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 10), StartupProbe: createProbe(httpProbe, 20)}, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 10), StartupProbe: createProbe(httpProbe, 20)}, + }, + { + "Env healthchecks, component healthchecks, env overrides comp", + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(execProbe, 30), ReadinessProbe: createProbe(httpProbe, 10), StartupProbe: createProbe(tcpProbe, 20)}, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 40), StartupProbe: createProbe(httpProbe, 20)}, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 40), StartupProbe: createProbe(httpProbe, 20)}, + }, + { + "Env healthchecks, component healthchecks, env merges comp", + &radixv1.RadixHealthChecks{ReadinessProbe: createProbe(httpProbe, 10), StartupProbe: createProbe(tcpProbe, 20)}, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 10)}, + &radixv1.RadixHealthChecks{LivenessProbe: createProbe(tcpProbe, 1), ReadinessProbe: createProbe(execProbe, 10), StartupProbe: createProbe(tcpProbe, 20)}, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + envConfig := utils.NewComponentEnvironmentBuilder().WithEnvironment("dev") + if testCase.envHealthChecks != nil { + envConfig.WithHealthChecks(testCase.envHealthChecks.StartupProbe, testCase.envHealthChecks.ReadinessProbe, testCase.envHealthChecks.LivenessProbe) + } + compConfig := utils.NewApplicationComponentBuilder().WithName("comp").WithEnvironmentConfig(envConfig) + if testCase.compHealthChecks != nil { + compConfig.WithHealthChecks(testCase.compHealthChecks.StartupProbe, testCase.compHealthChecks.ReadinessProbe, testCase.compHealthChecks.LivenessProbe) + } + raBuilder := utils.ARadixApplication().WithComponents(compConfig) + ra := raBuilder.BuildRA() + + deployComponents, err := GetRadixComponentsForEnv(context.Background(), ra, nil, "dev", make(pipeline.DeployComponentImages), make(radixv1.EnvVarsMap), nil) + require.NoError(t, err) + require.Len(t, deployComponents, 1) + + if testCase.expectedHealthChecks == nil { + assert.Nil(t, deployComponents[0].HealthChecks) + } else { + require.NotNil(t, deployComponents[0].HealthChecks) + assert.Equal(t, testCase.expectedHealthChecks.ReadinessProbe, deployComponents[0].HealthChecks.ReadinessProbe) + assert.Equal(t, testCase.expectedHealthChecks.LivenessProbe, deployComponents[0].HealthChecks.LivenessProbe) + assert.Equal(t, testCase.expectedHealthChecks.StartupProbe, deployComponents[0].HealthChecks.StartupProbe) + } + + }) + } + +} + func Test_GetRadixComponents_ReplicasOverride(t *testing.T) { componentName := "comp" env := "dev" diff --git a/pkg/apis/deployment/radixjobcomponent.go b/pkg/apis/deployment/radixjobcomponent.go index 9793c86e5..b7404a4e0 100644 --- a/pkg/apis/deployment/radixjobcomponent.go +++ b/pkg/apis/deployment/radixjobcomponent.go @@ -3,7 +3,9 @@ package deployment import ( "context" stderrors "errors" + "fmt" + "dario.cat/mergo" "github.com/equinor/radix-common/utils/numbers" "github.com/equinor/radix-common/utils/slice" "github.com/equinor/radix-operator/pkg/apis/defaults" @@ -95,7 +97,11 @@ func (c *jobComponentsBuilder) buildJobComponent(ctx context.Context, radixJobCo if err != nil { errs = append(errs, err) } - batchStatusRules := getBatchStatusRules(&radixJobComponent, environmentSpecificConfig) + failurePolicy, err := getRadixJobComponentFailurePolicy(radixJobComponent, environmentSpecificConfig) + if err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { return nil, stderrors.Join(errs...) } @@ -120,11 +126,31 @@ func (c *jobComponentsBuilder) buildJobComponent(ctx context.Context, radixJobCo ReadOnlyFileSystem: getRadixCommonComponentReadOnlyFileSystem(&radixJobComponent, environmentSpecificConfig), VolumeMounts: volumeMounts, Runtime: componentImage.Runtime, - BatchStatusRules: batchStatusRules, + BatchStatusRules: getRadixJobComponentBatchStatusRules(&radixJobComponent, environmentSpecificConfig), + FailurePolicy: failurePolicy, } return &deployJob, nil } +func getRadixJobComponentFailurePolicy(job v1.RadixJobComponent, jobEnvConfig *v1.RadixJobComponentEnvironmentConfig) (*v1.RadixJobComponentFailurePolicy, error) { + var dst *v1.RadixJobComponentFailurePolicy + if job.FailurePolicy != nil { + dst = job.FailurePolicy.DeepCopy() + } + + if jobEnvConfig != nil && jobEnvConfig.FailurePolicy != nil { + if dst == nil { + dst = &v1.RadixJobComponentFailurePolicy{} + } + + if err := mergo.Merge(dst, jobEnvConfig.FailurePolicy, mergo.WithOverride, mergo.WithOverrideEmptySlice, mergo.WithTransformers(booleanPointerTransformer)); err != nil { + return nil, fmt.Errorf("failed to merge failurePolicy from environment config: %w", err) + } + } + + return dst, nil +} + func getRadixJobComponentTimeLimitSeconds(radixJobComponent v1.RadixJobComponent, environmentSpecificConfig *v1.RadixJobComponentEnvironmentConfig) *int64 { if environmentSpecificConfig != nil && environmentSpecificConfig.TimeLimitSeconds != nil { return environmentSpecificConfig.TimeLimitSeconds diff --git a/pkg/apis/deployment/radixjobcomponent_test.go b/pkg/apis/deployment/radixjobcomponent_test.go index bf634f8b3..523f6e954 100644 --- a/pkg/apis/deployment/radixjobcomponent_test.go +++ b/pkg/apis/deployment/radixjobcomponent_test.go @@ -508,6 +508,116 @@ func Test_GetRadixJobComponents_Notifications(t *testing.T) { } } +func Test_GetRadixJobComponents_FailurePolicy(t *testing.T) { + + tests := map[string]struct { + commonConfig *radixv1.RadixJobComponentFailurePolicy + configureEnvironment bool + environmentConfig *radixv1.RadixJobComponentFailurePolicy + expected *radixv1.RadixJobComponentFailurePolicy + }{ + "nil when common and environment is nil": { + commonConfig: nil, + configureEnvironment: true, + environmentConfig: nil, + expected: nil, + }, + "nil when common is nil and environment not set": { + commonConfig: nil, + configureEnvironment: false, + expected: nil, + }, + "use common when environment is nil": { + commonConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + configureEnvironment: true, + environmentConfig: nil, + expected: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + }, + "use common when environment not set": { + commonConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + configureEnvironment: false, + expected: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + }, + "use environment when common is nil": { + commonConfig: nil, + configureEnvironment: true, + environmentConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + expected: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + }, + }, + }, + "use environment when both common and environment is set": { + commonConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + {Action: radixv1.RadixJobComponentFailurePolicyActionIgnore, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpNotIn, Values: []int32{4, 5, 6}}}, + }, + }, + configureEnvironment: true, + environmentConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionCount, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{7, 8}}}, + }, + }, + expected: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionCount, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{7, 8}}}, + }, + }, + }, + "use environment when environment empty and common is set": { + commonConfig: &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{1, 2, 3}}}, + {Action: radixv1.RadixJobComponentFailurePolicyActionIgnore, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpNotIn, Values: []int32{4, 5, 6}}}, + }, + }, + configureEnvironment: true, + environmentConfig: &radixv1.RadixJobComponentFailurePolicy{}, + expected: &radixv1.RadixJobComponentFailurePolicy{}, + }, + } + + for testName, test := range tests { + t.Run(testName, func(t *testing.T) { + const envName = "anyenv" + jobComponent := utils.AnApplicationJobComponent().WithName("anyjob").WithFailurePolicy(test.commonConfig) + if test.configureEnvironment { + jobComponent = jobComponent.WithEnvironmentConfigs( + utils.AJobComponentEnvironmentConfig().WithEnvironment(envName).WithFailurePolicy(test.environmentConfig), + ) + } + ra := utils.ARadixApplication().WithJobComponents(jobComponent).BuildRA() + sut := jobComponentsBuilder{ra: ra, env: envName, componentImages: make(pipeline.DeployComponentImages)} + jobs, err := sut.JobComponents(context.Background()) + require.NoError(t, err) + assert.Equal(t, test.expected, jobs[0].FailurePolicy) + }) + } +} + func TestGetRadixJobComponentsForEnv_ImageWithImageTagName(t *testing.T) { const ( dynamicImageName1 = "custom-image-name1:{imageTagName}" diff --git a/pkg/apis/radix/v1/radixapptypes.go b/pkg/apis/radix/v1/radixapptypes.go index 9dc5f4604..189c73ee8 100644 --- a/pkg/apis/radix/v1/radixapptypes.go +++ b/pkg/apis/radix/v1/radixapptypes.go @@ -346,6 +346,11 @@ type RadixComponent struct { // +optional DockerfileName string `json:"dockerfileName,omitempty"` + // HealthChecks can tell Radix if your application is ready to receive traffic. + // Defaults to a TCP check against your first listed port. + // If any healthchecks are defined, no defaults will be added and you should add your own readinessProbe. + HealthChecks *RadixHealthChecks `json:"healthChecks,omitempty"` + // Name of an existing container image to use when running the component. // More info: https://www.radix.equinor.com/references/reference-radix-config/#image // +optional @@ -489,6 +494,11 @@ type RadixEnvironmentConfig struct { // +optional Image string `json:"image,omitempty"` + // HealthChecks can tell Radix if your application is ready to receive traffic. + // Defaults to a TCP check against your first listed port. + // If any healthchecks are defined, no defaults will be added and you should add your own readinessProbe. + HealthChecks *RadixHealthChecks `json:"healthChecks,omitempty"` + // Number of desired replicas. // More info: https://www.radix.equinor.com/references/reference-radix-config/#replicas // +kubebuilder:validation:Minimum=0 @@ -699,6 +709,84 @@ type RadixJobComponent struct { // BatchStatusRules Rules define how a batch status is set corresponding to batch job statuses // +optional BatchStatusRules []BatchStatusRule `json:"batchStatusRules,omitempty"` + + // Specifies the policy of handling failed job replicas. In particular, it allows to + // specify the set of actions and conditions which need to be + // satisfied to take the associated action. + // If empty, the default behaviour applies - the counter of failed job replicas + // is incremented and it is checked against the backoffLimit. + // +optional + FailurePolicy *RadixJobComponentFailurePolicy `json:"failurePolicy,omitempty"` +} + +// RadixJobComponentFailurePolicyRuleOnExitCodesOperator specifies the relationship between a job replica's exit code +// and the list of exit codes in the requirement. +// +kubebuilder:validation:Enum=In;NotIn +type RadixJobComponentFailurePolicyRuleOnExitCodesOperator string + +const ( + // The requirement is satisfied if the job replica's exit code is in the set of specified values. + RadixJobComponentFailurePolicyRuleOnExitCodesOpIn RadixJobComponentFailurePolicyRuleOnExitCodesOperator = "In" + + // The requirement is satisfied if the job replica's exit code is not in the set of specified values. + RadixJobComponentFailurePolicyRuleOnExitCodesOpNotIn RadixJobComponentFailurePolicyRuleOnExitCodesOperator = "NotIn" +) + +// RadixJobComponentFailurePolicyRuleOnExitCodes describes the requirement for handling +// a failed job replica based on its exit code. +type RadixJobComponentFailurePolicyRuleOnExitCodes struct { + // Represents the relationship between the job replica's exit code and the + // specified values. Replicas completed with success (exit code 0) are + // excluded from the requirement check. + Operator RadixJobComponentFailurePolicyRuleOnExitCodesOperator `json:"operator"` + + // Specifies the set of values. The job replica's exit code is checked against this set of + // values with respect to the operator. The list must not contain duplicates. + // Value '0' cannot be used for the In operator. + // +kubebuilder:validation:MinItems:=1 + // +kubebuilder:validation:MaxItems:=255 + // +kubebuilder:validation:items:Minimum:=0 + // +listType=set + Values []int32 `json:"values"` +} + +// RadixJobComponentFailurePolicyAction specifies how a job replica failure is handled. +// +kubebuilder:validation:Enum=FailJob;Ignore;Count +type RadixJobComponentFailurePolicyAction string + +const ( + // This is an action which might be taken on a job replica failure - mark the + // job as Failed and terminate all running pods. + RadixJobComponentFailurePolicyActionFailJob RadixJobComponentFailurePolicyAction = "FailJob" + + // This is an action which might be taken on a job replica failure - the counter towards + // .backoffLimit is not incremented and a replacement replica is created. + RadixJobComponentFailurePolicyActionIgnore RadixJobComponentFailurePolicyAction = "Ignore" + + // This is an action which might be taken on a job replica failure - the replica failure + // is handled in the default way - the counter towards .backoffLimit is incremented. + RadixJobComponentFailurePolicyActionCount RadixJobComponentFailurePolicyAction = "Count" +) + +// RadixJobComponentFailurePolicyRule describes how a job replica failure is handled when the onExitCodes rules are met. +type RadixJobComponentFailurePolicyRule struct { + // Specifies the action taken on a job replica failure when the onExitCodes requirements are satisfied. + Action RadixJobComponentFailurePolicyAction `json:"action"` + + // Represents the requirement on the job replica exit codes. + OnExitCodes RadixJobComponentFailurePolicyRuleOnExitCodes `json:"onExitCodes"` +} + +// RadixJobComponentFailurePolicy describes how failed job replicas influence the backoffLimit. +type RadixJobComponentFailurePolicy struct { + // A list of failure policy rules. The rules are evaluated in order. + // Once a rule matches a job replica failure, the remaining of the rules are ignored. + // When no rule matches the failure, the default handling applies - the + // counter of failures is incremented and it is checked against + // the backoffLimit. + // +kubebuilder:validation:MaxItems:=20 + // +listType=atomic + Rules []RadixJobComponentFailurePolicyRule `json:"rules"` } // RadixJobComponentEnvironmentConfig defines environment specific settings @@ -795,6 +883,14 @@ type RadixJobComponentEnvironmentConfig struct { // BatchStatusRules Rules define how a batch status in an environment is set corresponding to batch job statuses // +optional BatchStatusRules []BatchStatusRule `json:"batchStatusRules,omitempty"` + + // Specifies the policy of handling failed job replicas. In particular, it allows to + // specify the set of actions and conditions which need to be + // satisfied to take the associated action. + // If empty, the default behaviour applies - the counter of failed job replicas + // is incremented and it is checked against the backoffLimit. + // +optional + FailurePolicy *RadixJobComponentFailurePolicy `json:"failurePolicy,omitempty"` } // RadixJobComponentPayload defines the path and where the payload received diff --git a/pkg/apis/radix/v1/radixbatchtypes.go b/pkg/apis/radix/v1/radixbatchtypes.go index 632395635..bac53282b 100644 --- a/pkg/apis/radix/v1/radixbatchtypes.go +++ b/pkg/apis/radix/v1/radixbatchtypes.go @@ -99,8 +99,12 @@ type RadixBatchJob struct { // ImageTagName defines the image tag name to use for the job image // - // required: false + // +optional ImageTagName string `json:"imageTagName,omitempty"` + + // FailurePolicy specifies the policy of handling failed job replicas + // +optional + FailurePolicy *RadixJobComponentFailurePolicy `json:"failurePolicy,omitempty"` } // PayloadSecretKeySelector selects a key of a Secret. diff --git a/pkg/apis/radix/v1/radixdeploytypes.go b/pkg/apis/radix/v1/radixdeploytypes.go index 12e134626..7af7b87af 100644 --- a/pkg/apis/radix/v1/radixdeploytypes.go +++ b/pkg/apis/radix/v1/radixdeploytypes.go @@ -128,6 +128,7 @@ type RadixDeployComponent struct { ExternalDNS []RadixDeployExternalDNS `json:"externalDNS,omitempty"` // Deprecated: For backward compatibility we must still support this field. New code should use ExternalDNS instead. DNSExternalAlias []string `json:"dnsExternalAlias,omitempty"` + HealthChecks *RadixHealthChecks `json:"healthChecks,omitempty"` Monitoring bool `json:"monitoring"` MonitoringConfig MonitoringConfig `json:"monitoringConfig,omitempty"` Resources ResourceRequirements `json:"resources,omitempty"` @@ -142,6 +143,19 @@ type RadixDeployComponent struct { Network *Network `json:"network,omitempty"` } +func (deployComponent *RadixDeployComponent) GetHealthChecks() *RadixHealthChecks { + if deployComponent.HealthChecks == nil { + return nil + } + if deployComponent.HealthChecks.ReadinessProbe == nil && + deployComponent.HealthChecks.LivenessProbe == nil && + deployComponent.HealthChecks.StartupProbe == nil { + return nil + } + + return deployComponent.HealthChecks +} + func (deployComponent *RadixDeployComponent) GetName() string { return deployComponent.Name } @@ -423,6 +437,14 @@ type RadixDeployJobComponent struct { // BatchStatusRules Rules define how a batch status is set corresponding to batch job statuses // +optional BatchStatusRules []BatchStatusRule `json:"batchStatusRules,omitempty"` + + // FailurePolicy specifies the policy of handling failed job replicas + // +optional + FailurePolicy *RadixJobComponentFailurePolicy `json:"failurePolicy,omitempty"` +} + +func (r *RadixDeployJobComponent) GetHealthChecks() *RadixHealthChecks { + return nil } type RadixComponentType string @@ -463,6 +485,7 @@ type RadixCommonDeployComponent interface { GetReadOnlyFileSystem() *bool GetRuntime() *Runtime GetNetwork() *Network + GetHealthChecks() *RadixHealthChecks } // RadixCommonDeployComponentFactory defines a common component factory diff --git a/pkg/apis/radix/v1/radixhealthchecktypes.go b/pkg/apis/radix/v1/radixhealthchecktypes.go new file mode 100644 index 000000000..d8e67ece4 --- /dev/null +++ b/pkg/apis/radix/v1/radixhealthchecktypes.go @@ -0,0 +1,209 @@ +package v1 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type RadixHealthChecks struct { + // Periodic probe of container liveness. + // Container will be restarted if the probe fails. + // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + // +optional + LivenessProbe *RadixProbe `json:"livenessProbe,omitempty"` + // Periodic probe of container service readiness. + // Container will be removed from service endpoints if the probe fails. + // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + // Defaults to TCP Probe against the first listed port + // +optional + ReadinessProbe *RadixProbe `json:"readinessProbe,omitempty"` + // StartupProbe indicates that the Pod has successfully initialized. + // If specified, no other probes are executed until this completes successfully. + // If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + // This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + // when it might take a long time to load data or warm a cache, than during steady-state operation. + // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + // +optional + StartupProbe *RadixProbe `json:"startupProbe,omitempty"` +} + +// RadixProbe describes a health check to be performed against a container to determine whether it is +// alive or ready to receive traffic. +type RadixProbe struct { + // The action taken to determine the health of a container + RadixProbeHandler `json:",inline"` + // Number of seconds after the container has started before liveness probes are initiated. + // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + // +optional + InitialDelaySeconds int32 `json:"initialDelaySeconds,omitempty"` + // Number of seconds after which the probe times out. + // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + // +kubebuilder:validation:Minimum=1 + // +default=1 + // +optional + TimeoutSeconds int32 `json:"timeoutSeconds,omitempty"` + // How often (in seconds) to perform the probe. + // +kubebuilder:validation:Minimum=1 + // +default=10 + // +optional + PeriodSeconds int32 `json:"periodSeconds,omitempty"` + // Minimum consecutive successes for the probe to be considered successful after having failed. + // Must be 1 for liveness and startup. + // +kubebuilder:validation:Minimum=1 + // +default=1 + // +optional + SuccessThreshold int32 `json:"successThreshold,omitempty"` + // Minimum consecutive failures for the probe to be considered failed after having succeeded. + // +kubebuilder:validation:Minimum=1 + // +default=3 + // +optional + FailureThreshold int32 `json:"failureThreshold,omitempty"` + + // Todo: This is a beta property that we might want to take in in the future + // TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty"` +} + +func (rp *RadixProbe) MapToCoreProbe() *corev1.Probe { + if rp == nil { + return nil + } + + return &corev1.Probe{ + ProbeHandler: rp.RadixProbeHandler.MapToCoreProbe(), + InitialDelaySeconds: rp.InitialDelaySeconds, + TimeoutSeconds: rp.TimeoutSeconds, + PeriodSeconds: rp.PeriodSeconds, + SuccessThreshold: rp.SuccessThreshold, + FailureThreshold: rp.FailureThreshold, + } +} + +// RadixProbeHandler defines a specific action that should be taken in a probe. +// One and only one of the fields must be specified. +type RadixProbeHandler struct { + // Exec specifies the action to take. + Exec *RadixProbeExecAction `json:"exec,omitempty"` + // HTTPGet specifies the http request to perform. + HTTPGet *RadixProbeHTTPGetAction `json:"httpGet,omitempty"` + // TCPSocket specifies an action involving a TCP port. + TCPSocket *RadixProbeTCPSocketAction `json:"tcpSocket,omitempty"` + // GRPC specifies an action involving a GRPC port. + GRPC *RadixProbeGRPCAction `json:"grpc,omitempty"` +} + +func (p RadixProbeHandler) MapToCoreProbe() corev1.ProbeHandler { + return corev1.ProbeHandler{ + Exec: p.Exec.MapToCoreProbe(), + HTTPGet: p.HTTPGet.MapToCoreProbe(), + TCPSocket: p.TCPSocket.MapToCoreProbe(), + GRPC: p.GRPC.MapToCoreProbe(), + } +} + +// RadixProbeHTTPGetAction describes an action based on HTTP Get requests. +type RadixProbeHTTPGetAction struct { + // Path to access on the HTTP server. + // +optional + Path string `json:"path,omitempty"` + // port number to access on the container. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + Port int32 `json:"port"` + // Host name to connect to, defaults to the pod IP. You probably want to set + // "Host" in httpHeaders instead. + // +optional + Host string `json:"host,omitempty"` + // Scheme to use for connecting to the host. + // Defaults to HTTP. + // +optional + // +kubebuilder:validation:Enum=HTTPS;HTTP + Scheme corev1.URIScheme `json:"scheme,omitempty"` + // Custom headers to set in the request. HTTP allows repeated headers. + // +optional + // +listType=atomic + HTTPHeaders []corev1.HTTPHeader `json:"httpHeaders,omitempty"` +} + +func (a *RadixProbeHTTPGetAction) MapToCoreProbe() *corev1.HTTPGetAction { + if a == nil { + return nil + } + + return &corev1.HTTPGetAction{ + Path: a.Path, + Port: intstr.FromInt32(a.Port), + Host: a.Host, + Scheme: a.Scheme, + HTTPHeaders: a.HTTPHeaders, + } +} + +// RadixProbeExecAction describes a "run in container" action. +type RadixProbeExecAction struct { + // Command is the command line to execute inside the container, the working directory for the + // command is root ('/') in the container's filesystem. The command is simply exec'd, it is + // not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + // a shell, you need to explicitly call out to that shell. + // Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + // +optional + // +listType=atomic + Command []string `json:"command,omitempty"` +} + +func (a *RadixProbeExecAction) MapToCoreProbe() *corev1.ExecAction { + if a == nil { + return nil + } + + return &corev1.ExecAction{ + Command: a.Command, + } +} + +// RadixProbeTCPSocketAction describes an action based on opening a socket +type RadixProbeTCPSocketAction struct { + // port number to access on the container. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + Port int32 `json:"port"` + // Optional: Host name to connect to, defaults to the pod IP. + // +optional + Host string `json:"host,omitempty"` +} + +func (a *RadixProbeTCPSocketAction) MapToCoreProbe() *corev1.TCPSocketAction { + if a == nil { + return nil + } + + return &corev1.TCPSocketAction{ + Port: intstr.FromInt32(a.Port), + Host: a.Host, + } +} + +type RadixProbeGRPCAction struct { + // Port number of the gRPC service. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + Port int32 `json:"port"` + + // Service is the name of the service to place in the gRPC HealthCheckRequest + // (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + // + // If this is not specified, the default behavior is defined by gRPC. + // +optional + // +default="" + Service *string `json:"service"` +} + +func (a *RadixProbeGRPCAction) MapToCoreProbe() *corev1.GRPCAction { + if a == nil { + return nil + } + + return &corev1.GRPCAction{ + Port: a.Port, + Service: a.Service, + } +} diff --git a/pkg/apis/radix/v1/radixjobtypes.go b/pkg/apis/radix/v1/radixjobtypes.go index 2c2b35047..7c5ebf75b 100644 --- a/pkg/apis/radix/v1/radixjobtypes.go +++ b/pkg/apis/radix/v1/radixjobtypes.go @@ -77,33 +77,32 @@ const ( type RadixBuildSpec struct { // Tag of the built image // - // required: true + // +required ImageTag string `json:"imageTag"` // Branch, from which the image to be built // - // required: true + // +required Branch string `json:"branch"` // ToEnvironment the environment to build or build-deploy to // - // required: false - // example: prod + // +optional ToEnvironment string `json:"toEnvironment,omitempty"` // CommitID, from which the image to be built // - // required: false + // +optional CommitID string `json:"commitID,omitempty"` // Is the built image need to be pushed to the container registry repository // - // required: false + // +optional PushImage bool `json:"pushImage,omitempty"` // OverrideUseBuildCache override default or configured build cache option // - // required: false + // +optional OverrideUseBuildCache *bool `json:"overrideUseBuildCache,omitempty"` } @@ -111,22 +110,22 @@ type RadixBuildSpec struct { type RadixPromoteSpec struct { // Name of the Radix deployment to be promoted // - // required: false + // +optional DeploymentName string `json:"deploymentName,omitempty"` // Environment name, from which the Radix deployment is being promoted // - // required: true + // +required FromEnvironment string `json:"fromEnvironment"` // Environment name, to which the Radix deployment is being promoted // - // required: true + // +required ToEnvironment string `json:"toEnvironment"` // CommitID of the promoted deployment // - // required: false + // +optional CommitID string `json:"commitID,omitempty"` } @@ -134,24 +133,23 @@ type RadixPromoteSpec struct { type RadixDeploySpec struct { // Target environment for deploy // - // required: true + // +required ToEnvironment string `json:"toEnvironment"` // Image tags names for components - if empty will use default logic // - // required: false - // Example: component1: tag1,component2: tag2 + // +optional ImageTagNames map[string]string `json:"imageTagNames,omitempty"` // Commit ID connected to the deployment // - // required: false + // +optional CommitID string `json:"commitID,omitempty"` // ComponentsToDeploy List of components to deploy // OPTIONAL If specified, only these components are deployed // - // required: false + // +optional ComponentsToDeploy []string `json:"componentsToDeploy,omitempty"` } @@ -159,7 +157,7 @@ type RadixDeploySpec struct { type RadixApplyConfigSpec struct { // Deploy External DNS configuration // - // required: false + // +optional DeployExternalDNS bool `json:"deployExternalDNS,omitempty"` } diff --git a/pkg/apis/radix/v1/zz_generated.deepcopy.go b/pkg/apis/radix/v1/zz_generated.deepcopy.go index fa71c6a19..c5d7443a2 100644 --- a/pkg/apis/radix/v1/zz_generated.deepcopy.go +++ b/pkg/apis/radix/v1/zz_generated.deepcopy.go @@ -1160,6 +1160,11 @@ func (in *RadixBatchJob) DeepCopyInto(out *RadixBatchJob) { *out = new(bool) **out = **in } + if in.FailurePolicy != nil { + in, out := &in.FailurePolicy, &out.FailurePolicy + *out = new(RadixJobComponentFailurePolicy) + (*in).DeepCopyInto(*out) + } return } @@ -1367,6 +1372,11 @@ func (in *RadixBuildSpec) DeepCopy() *RadixBuildSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RadixComponent) DeepCopyInto(out *RadixComponent) { *out = *in + if in.HealthChecks != nil { + in, out := &in.HealthChecks, &out.HealthChecks + *out = new(RadixHealthChecks) + (*in).DeepCopyInto(*out) + } if in.Ports != nil { in, out := &in.Ports, &out.Ports *out = make([]ComponentPort, len(*in)) @@ -1608,6 +1618,11 @@ func (in *RadixDeployComponent) DeepCopyInto(out *RadixDeployComponent) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.HealthChecks != nil { + in, out := &in.HealthChecks, &out.HealthChecks + *out = new(RadixHealthChecks) + (*in).DeepCopyInto(*out) + } out.MonitoringConfig = in.MonitoringConfig in.Resources.DeepCopyInto(&out.Resources) if in.HorizontalScaling != nil { @@ -1771,6 +1786,11 @@ func (in *RadixDeployJobComponent) DeepCopyInto(out *RadixDeployJobComponent) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailurePolicy != nil { + in, out := &in.FailurePolicy, &out.FailurePolicy + *out = new(RadixJobComponentFailurePolicy) + (*in).DeepCopyInto(*out) + } return } @@ -2008,6 +2028,11 @@ func (in *RadixEnvironment) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RadixEnvironmentConfig) DeepCopyInto(out *RadixEnvironmentConfig) { *out = *in + if in.HealthChecks != nil { + in, out := &in.HealthChecks, &out.HealthChecks + *out = new(RadixHealthChecks) + (*in).DeepCopyInto(*out) + } if in.Replicas != nil { in, out := &in.Replicas, &out.Replicas *out = new(int) @@ -2159,6 +2184,37 @@ func (in *RadixEnvironmentStatus) DeepCopy() *RadixEnvironmentStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixHealthChecks) DeepCopyInto(out *RadixHealthChecks) { + *out = *in + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(RadixProbe) + (*in).DeepCopyInto(*out) + } + if in.ReadinessProbe != nil { + in, out := &in.ReadinessProbe, &out.ReadinessProbe + *out = new(RadixProbe) + (*in).DeepCopyInto(*out) + } + if in.StartupProbe != nil { + in, out := &in.StartupProbe, &out.StartupProbe + *out = new(RadixProbe) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixHealthChecks. +func (in *RadixHealthChecks) DeepCopy() *RadixHealthChecks { + if in == nil { + return nil + } + out := new(RadixHealthChecks) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RadixHorizontalScaling) DeepCopyInto(out *RadixHorizontalScaling) { *out = *in @@ -2517,6 +2573,11 @@ func (in *RadixJobComponent) DeepCopyInto(out *RadixJobComponent) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailurePolicy != nil { + in, out := &in.FailurePolicy, &out.FailurePolicy + *out = new(RadixJobComponentFailurePolicy) + (*in).DeepCopyInto(*out) + } return } @@ -2597,6 +2658,11 @@ func (in *RadixJobComponentEnvironmentConfig) DeepCopyInto(out *RadixJobComponen (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailurePolicy != nil { + in, out := &in.FailurePolicy, &out.FailurePolicy + *out = new(RadixJobComponentFailurePolicy) + (*in).DeepCopyInto(*out) + } return } @@ -2610,6 +2676,67 @@ func (in *RadixJobComponentEnvironmentConfig) DeepCopy() *RadixJobComponentEnvir return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixJobComponentFailurePolicy) DeepCopyInto(out *RadixJobComponentFailurePolicy) { + *out = *in + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]RadixJobComponentFailurePolicyRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixJobComponentFailurePolicy. +func (in *RadixJobComponentFailurePolicy) DeepCopy() *RadixJobComponentFailurePolicy { + if in == nil { + return nil + } + out := new(RadixJobComponentFailurePolicy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixJobComponentFailurePolicyRule) DeepCopyInto(out *RadixJobComponentFailurePolicyRule) { + *out = *in + in.OnExitCodes.DeepCopyInto(&out.OnExitCodes) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixJobComponentFailurePolicyRule. +func (in *RadixJobComponentFailurePolicyRule) DeepCopy() *RadixJobComponentFailurePolicyRule { + if in == nil { + return nil + } + out := new(RadixJobComponentFailurePolicyRule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixJobComponentFailurePolicyRuleOnExitCodes) DeepCopyInto(out *RadixJobComponentFailurePolicyRuleOnExitCodes) { + *out = *in + if in.Values != nil { + in, out := &in.Values, &out.Values + *out = make([]int32, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixJobComponentFailurePolicyRuleOnExitCodes. +func (in *RadixJobComponentFailurePolicyRuleOnExitCodes) DeepCopy() *RadixJobComponentFailurePolicyRuleOnExitCodes { + if in == nil { + return nil + } + out := new(RadixJobComponentFailurePolicyRuleOnExitCodes) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RadixJobComponentPayload) DeepCopyInto(out *RadixJobComponentPayload) { *out = *in @@ -2796,6 +2923,138 @@ func (in *RadixPrivateImageHubCredential) DeepCopy() *RadixPrivateImageHubCreden return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbe) DeepCopyInto(out *RadixProbe) { + *out = *in + in.RadixProbeHandler.DeepCopyInto(&out.RadixProbeHandler) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbe. +func (in *RadixProbe) DeepCopy() *RadixProbe { + if in == nil { + return nil + } + out := new(RadixProbe) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbeExecAction) DeepCopyInto(out *RadixProbeExecAction) { + *out = *in + if in.Command != nil { + in, out := &in.Command, &out.Command + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbeExecAction. +func (in *RadixProbeExecAction) DeepCopy() *RadixProbeExecAction { + if in == nil { + return nil + } + out := new(RadixProbeExecAction) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbeGRPCAction) DeepCopyInto(out *RadixProbeGRPCAction) { + *out = *in + if in.Service != nil { + in, out := &in.Service, &out.Service + *out = new(string) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbeGRPCAction. +func (in *RadixProbeGRPCAction) DeepCopy() *RadixProbeGRPCAction { + if in == nil { + return nil + } + out := new(RadixProbeGRPCAction) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbeHTTPGetAction) DeepCopyInto(out *RadixProbeHTTPGetAction) { + *out = *in + if in.HTTPHeaders != nil { + in, out := &in.HTTPHeaders, &out.HTTPHeaders + *out = make([]corev1.HTTPHeader, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbeHTTPGetAction. +func (in *RadixProbeHTTPGetAction) DeepCopy() *RadixProbeHTTPGetAction { + if in == nil { + return nil + } + out := new(RadixProbeHTTPGetAction) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbeHandler) DeepCopyInto(out *RadixProbeHandler) { + *out = *in + if in.Exec != nil { + in, out := &in.Exec, &out.Exec + *out = new(RadixProbeExecAction) + (*in).DeepCopyInto(*out) + } + if in.HTTPGet != nil { + in, out := &in.HTTPGet, &out.HTTPGet + *out = new(RadixProbeHTTPGetAction) + (*in).DeepCopyInto(*out) + } + if in.TCPSocket != nil { + in, out := &in.TCPSocket, &out.TCPSocket + *out = new(RadixProbeTCPSocketAction) + **out = **in + } + if in.GRPC != nil { + in, out := &in.GRPC, &out.GRPC + *out = new(RadixProbeGRPCAction) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbeHandler. +func (in *RadixProbeHandler) DeepCopy() *RadixProbeHandler { + if in == nil { + return nil + } + out := new(RadixProbeHandler) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RadixProbeTCPSocketAction) DeepCopyInto(out *RadixProbeTCPSocketAction) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RadixProbeTCPSocketAction. +func (in *RadixProbeTCPSocketAction) DeepCopy() *RadixProbeTCPSocketAction { + if in == nil { + return nil + } + out := new(RadixProbeTCPSocketAction) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RadixPromoteSpec) DeepCopyInto(out *RadixPromoteSpec) { *out = *in diff --git a/pkg/apis/radixvalidators/errors.go b/pkg/apis/radixvalidators/errors.go index 43ffdedc5..c0a7f4a84 100644 --- a/pkg/apis/radixvalidators/errors.go +++ b/pkg/apis/radixvalidators/errors.go @@ -16,6 +16,7 @@ var ( ErrEnvForDNSExternalAliasNotDefined = errors.New("env for dns external alias not defined") ErrComponentForDNSExternalAliasNotDefined = errors.New("component for dns external alias not defined") ErrComponentForDNSExternalAliasIsNotMarkedAsPublic = errors.New("component for dns external alias is not marked as public") + ErrComponentHasInvalidHealthCheck = errors.New("component has invalid health check") ErrEnvironmentReferencedByComponentDoesNotExist = errors.New("environment referenced by component does not exist") ErrInvalidPortNameLength = errors.New("invalid port name length") ErrPortNameIsRequiredForPublicComponent = errors.New("port name is required for public component") @@ -26,6 +27,8 @@ var ( ErrMemoryResourceRequirementFormat = errors.New("memory resource requirement format") ErrCPUResourceRequirementFormat = errors.New("cpu resource requirement format") ErrInvalidVerificationType = errors.New("invalid verification") + ErrInvalidHealthCheckProbe = errors.New("probe configuration error, only one action allowed") + ErrSuccessThresholdMustBeOne = errors.New("success threshold must be equal to one") ErrResourceRequestOverLimit = errors.New("resource request over limit") ErrInvalidResource = errors.New("invalid resource") ErrDuplicateExternalAlias = errors.New("duplicate external alias") @@ -108,6 +111,7 @@ var ( ErrMissingAzureIdentity = errors.New("missing identity") ErrInvalidRuntimeArchitecture = errors.New("invalid runtime architecture") ErrInvalidIPv4OrCIDR = errors.New("invalid IPv4 or CIDR") + ErrFailurePolicyRuleExitCodeZeroNotAllowedForInOperator = errors.New("value 0 cannot be used for the In operator") ) // DuplicateAliasForDNSAliasError Error when aliases are duplicate diff --git a/pkg/apis/radixvalidators/testdata/radixconfig.yaml b/pkg/apis/radixvalidators/testdata/radixconfig.yaml index 3855a77c5..5a65b9b71 100644 --- a/pkg/apis/radixvalidators/testdata/radixconfig.yaml +++ b/pkg/apis/radixvalidators/testdata/radixconfig.yaml @@ -31,6 +31,20 @@ spec: clientId: 11111111-2222-BBBB-cccc-555555555555 runtime: architecture: arm64 + healthChecks: + startupProbe: + tcpSocket: + port: 8000 + livenessProbe: + tcpSocket: + port: 8000 + successThreshold: 1 + readinessProbe: + successThreshold: 1 + periodSeconds: 30 + httpGet: + port: 8000 + path: /healthz network: ingress: public: @@ -54,6 +68,20 @@ spec: refresh: 30m expire: 168h sameSite: "strict" + healthChecks: + startupProbe: + tcpSocket: + port: 8000 + livenessProbe: + tcpSocket: + port: 8000 + successThreshold: 1 + readinessProbe: + successThreshold: 1 + periodSeconds: 30 + httpGet: + port: 8000 + path: /healthz resources: limits: memory: "512Mi" @@ -149,6 +177,16 @@ spec: clientId: 11111111-2222-3333-4444-555555555555 runtime: architecture: arm64 + failurePolicy: + rules: + - action: FailJob + onExitCodes: + operator: In + values: [1,2] + - action: FailJob + onExitCodes: + operator: NotIn + values: [0,1] environmentConfig: - environment: dev variables: @@ -170,6 +208,16 @@ spec: path: /path/to/mount runtime: architecture: amd64 + failurePolicy: + rules: + - action: FailJob + onExitCodes: + operator: In + values: [1,2] + - action: FailJob + onExitCodes: + operator: NotIn + values: [0,1] - name: job2 src: job2/ schedulerPort: 8888 @@ -196,4 +244,4 @@ spec: dnsAlias: - environment: prod component: app2 - alias: my-alias \ No newline at end of file + alias: my-alias diff --git a/pkg/apis/radixvalidators/validate_ra.go b/pkg/apis/radixvalidators/validate_ra.go index 8f1e49c84..8cff539fc 100644 --- a/pkg/apis/radixvalidators/validate_ra.go +++ b/pkg/apis/radixvalidators/validate_ra.go @@ -331,6 +331,10 @@ func validateComponent(app *radixv1.RadixApplication, component radixv1.RadixCom errs = append(errs, fmt.Errorf("invalid network configuration: %w", err)) } + if err := validateHealthChecks(component.HealthChecks); err != nil { + errs = append(errs, fmt.Errorf("invalid health check configuration: %w", err)) + } + for _, environment := range component.EnvironmentConfig { if err := validateComponentEnvironment(app, component, environment); err != nil { errs = append(errs, fmt.Errorf("invalid configuration for environment %s: %w", environment.Environment, err)) @@ -372,6 +376,10 @@ func validateComponentEnvironment(app *radixv1.RadixApplication, component radix errs = append(errs, fmt.Errorf("invalid network configuration: %w", err)) } + if err := validateHealthChecks(environment.HealthChecks); err != nil { + errs = append(errs, fmt.Errorf("invalid health check configuration: %w", err)) + } + return errors.Join(errs...) } @@ -428,6 +436,10 @@ func validateJobComponent(app *radixv1.RadixApplication, job radixv1.RadixJobCom errs = append(errs, err) } + if err := validateFailurePolicy(job.FailurePolicy); err != nil { + errs = append(errs, fmt.Errorf("invalid failurePolicy configuration: %w", err)) + } + for _, environment := range job.EnvironmentConfig { if err := validateJobComponentEnvironment(app, job, environment); err != nil { errs = append(errs, fmt.Errorf("invalid configuration for environment %s: %w", environment.Environment, err)) @@ -437,6 +449,41 @@ func validateJobComponent(app *radixv1.RadixApplication, job radixv1.RadixJobCom return errors.Join(errs...) } +func validateFailurePolicy(failurePolicy *radixv1.RadixJobComponentFailurePolicy) error { + if failurePolicy == nil { + return nil + } + + if len(failurePolicy.Rules) > 0 { + var errs []error + for _, rule := range failurePolicy.Rules { + errs = append(errs, validateFailurePolicyRule(rule)) + } + if err := errors.Join(errs...); err != nil { + return fmt.Errorf("invalid rules configuration: %w", err) + } + } + + return nil +} + +func validateFailurePolicyRule(rule radixv1.RadixJobComponentFailurePolicyRule) error { + if err := validateFailurePolicyRuleOnExitCodes(rule.OnExitCodes); err != nil { + return fmt.Errorf("invalid onExitCodes configuration: %w", err) + } + + return nil +} + +func validateFailurePolicyRuleOnExitCodes(onExitCodes radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes) error { + if onExitCodes.Operator == radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn && + slices.Contains(onExitCodes.Values, 0) { + return ErrFailurePolicyRuleExitCodeZeroNotAllowedForInOperator + } + + return nil +} + func validateJobComponentEnvironment(app *radixv1.RadixApplication, job radixv1.RadixJobComponent, environment radixv1.RadixJobComponentEnvironmentConfig) error { var errs []error @@ -461,6 +508,10 @@ func validateJobComponentEnvironment(app *radixv1.RadixApplication, job radixv1. errs = append(errs, err) } + if err := validateFailurePolicy(environment.FailurePolicy); err != nil { + errs = append(errs, fmt.Errorf("invalid failurePolicy configuration: %w", err)) + } + return errors.Join(errs...) } @@ -829,6 +880,65 @@ func validateRadixComponentSecrets(component radixv1.RadixCommonComponent, app * return validateConflictingEnvironmentAndSecretRefsNames(component, envsEnvVarsMap) } +func validateHealthChecks(healthChecks *radixv1.RadixHealthChecks) error { + if healthChecks == nil { + return nil + } + + var errs []error + + if err := validateProbe(healthChecks.StartupProbe); err != nil { + errs = append(errs, fmt.Errorf("probe StartupProbe is invalid: %w", err)) + } + if err := validateProbe(healthChecks.ReadinessProbe); err != nil { + errs = append(errs, fmt.Errorf("probe ReadinessProbe is invalid: %w", err)) + } + if err := validateProbe(healthChecks.LivenessProbe); err != nil { + errs = append(errs, fmt.Errorf("probe LivenessProbe is invalid: %w", err)) + } + + // SuccessTreshold must be 0 (unset) or 1 for Startup Probe + if healthChecks.StartupProbe != nil && healthChecks.StartupProbe.SuccessThreshold > 1 { + errs = append(errs, fmt.Errorf("probe StartupProbe is invalid: %w", ErrSuccessThresholdMustBeOne)) + } + + // SuccessTreshold must be 0 (unset) or 1 for Startup Probe + if healthChecks.LivenessProbe != nil && healthChecks.LivenessProbe.SuccessThreshold > 1 { + errs = append(errs, fmt.Errorf("probe LivenessProbe is invalid: %w", ErrSuccessThresholdMustBeOne)) + } + + return errors.Join(errs...) +} + +func validateProbe(probe *radixv1.RadixProbe) error { + if probe == nil { + return nil + } + + definedProbes := 0 + if probe.HTTPGet != nil { + definedProbes++ + } + + if probe.TCPSocket != nil { + definedProbes++ + } + + if probe.Exec != nil { + definedProbes++ + } + + if probe.GRPC != nil { + definedProbes++ + } + + if definedProbes > 1 { + return ErrInvalidHealthCheckProbe + } + + return nil +} + func getEnvVarNameMap(componentEnvVarsMap radixv1.EnvVarsMap, envsEnvVarsMap radixv1.EnvVarsMap) map[string]bool { envVarsMap := make(map[string]bool) for name := range componentEnvVarsMap { diff --git a/pkg/apis/radixvalidators/validate_ra_test.go b/pkg/apis/radixvalidators/validate_ra_test.go index b96a03899..d65a6e903 100644 --- a/pkg/apis/radixvalidators/validate_ra_test.go +++ b/pkg/apis/radixvalidators/validate_ra_test.go @@ -606,6 +606,41 @@ func Test_invalid_ra(t *testing.T) { rr.Spec.Components[0].EnvironmentConfig[0].Authentication.OAuth2.Cookie.Expire = "30m" rr.Spec.Components[0].EnvironmentConfig[0].Authentication.OAuth2.Cookie.Refresh = "1h" }}, + {"invalid healthchecks are invalid", radixvalidators.ErrInvalidHealthCheckProbe, func(rr *radixv1.RadixApplication) { + rr.Spec.Components[0].HealthChecks = &radixv1.RadixHealthChecks{ + LivenessProbe: &radixv1.RadixProbe{ + RadixProbeHandler: radixv1.RadixProbeHandler{ + HTTPGet: &radixv1.RadixProbeHTTPGetAction{Port: 5000, Path: "/healthz"}, + Exec: &radixv1.RadixProbeExecAction{Command: []string{"/bin/sh", "-c", "/healthz"}}, + TCPSocket: &radixv1.RadixProbeTCPSocketAction{Port: 5000}, + }, + }, + ReadinessProbe: &radixv1.RadixProbe{ + RadixProbeHandler: radixv1.RadixProbeHandler{ + HTTPGet: &radixv1.RadixProbeHTTPGetAction{Port: 5000, Path: "/healthz"}, + Exec: &radixv1.RadixProbeExecAction{Command: []string{"/bin/sh", "-c", "/healthz"}}, + TCPSocket: &radixv1.RadixProbeTCPSocketAction{Port: 5000}, + }, + }, + StartupProbe: &radixv1.RadixProbe{ + RadixProbeHandler: radixv1.RadixProbeHandler{ + HTTPGet: &radixv1.RadixProbeHTTPGetAction{Port: 5000, Path: "/healthz"}, + Exec: &radixv1.RadixProbeExecAction{Command: []string{"/bin/sh", "-c", "/healthz"}}, + TCPSocket: &radixv1.RadixProbeTCPSocketAction{Port: 5000}, + }, + }, + } + }}, + {"invalid healthchecks are invalid", radixvalidators.ErrSuccessThresholdMustBeOne, func(rr *radixv1.RadixApplication) { + rr.Spec.Components[0].HealthChecks = &radixv1.RadixHealthChecks{ + LivenessProbe: &radixv1.RadixProbe{ + RadixProbeHandler: radixv1.RadixProbeHandler{ + HTTPGet: &radixv1.RadixProbeHTTPGetAction{Port: 5000, Path: "/healthz"}, + }, + SuccessThreshold: 5, + }, + } + }}, {"duplicate name in job/component boundary", radixvalidators.DuplicateComponentOrJobNameErrorWithMessage([]string{validRAFirstComponentName}), func(ra *radixv1.RadixApplication) { job := *ra.Spec.Jobs[0].DeepCopy() job.Name = validRAFirstComponentName @@ -671,6 +706,24 @@ func Test_invalid_ra(t *testing.T) { {"invalid value network.ingress.public.allow for component environment config", radixvalidators.ErrInvalidIPv4OrCIDR, func(ra *radixv1.RadixApplication) { ra.Spec.Components[0].EnvironmentConfig[0].Network = &radixv1.Network{Ingress: &radixv1.Ingress{Public: &radixv1.IngressPublic{Allow: &[]radixv1.IPOrCIDR{radixv1.IPOrCIDR("any")}}}} }}, + {"invalid exit code 0 for In operator in failure policy for job", radixvalidators.ErrFailurePolicyRuleExitCodeZeroNotAllowedForInOperator, func(ra *radixv1.RadixApplication) { + ra.Spec.Jobs[0].FailurePolicy = &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + { + Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, + OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{0}}, + }, + }} + }}, + {"invalid exit code 0 for In operator in failure policy for job environment config", radixvalidators.ErrFailurePolicyRuleExitCodeZeroNotAllowedForInOperator, func(ra *radixv1.RadixApplication) { + ra.Spec.Jobs[0].EnvironmentConfig[0].FailurePolicy = &radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + { + Action: radixv1.RadixJobComponentFailurePolicyActionFailJob, + OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{0}}, + }, + }} + }}, } _, client := validRASetup() diff --git a/pkg/apis/utils/applicationcomponent_builder.go b/pkg/apis/utils/applicationcomponent_builder.go index bc115de30..dae81bd3b 100644 --- a/pkg/apis/utils/applicationcomponent_builder.go +++ b/pkg/apis/utils/applicationcomponent_builder.go @@ -10,6 +10,7 @@ type RadixApplicationComponentBuilder interface { WithAlwaysPullImageOnDeploy(bool) RadixApplicationComponentBuilder WithSourceFolder(string) RadixApplicationComponentBuilder WithDockerfileName(string) RadixApplicationComponentBuilder + WithHealthChecks(startupProbe, readynessProbe, livenessProbe *radixv1.RadixProbe) RadixApplicationComponentBuilder WithImage(string) RadixApplicationComponentBuilder WithImageTagName(imageTagName string) RadixApplicationComponentBuilder WithPublic(bool) RadixApplicationComponentBuilder // Deprecated: For backwards compatibility WithPublic is still supported, new code should use WithPublicPort instead @@ -64,6 +65,7 @@ type radixApplicationComponentBuilder struct { horizontalScaling *radixv1.RadixHorizontalScaling runtime *radixv1.Runtime network *radixv1.Network + healtChecks *radixv1.RadixHealthChecks } func (rcb *radixApplicationComponentBuilder) WithName(name string) RadixApplicationComponentBuilder { @@ -76,6 +78,15 @@ func (rcb *radixApplicationComponentBuilder) WithAlwaysPullImageOnDeploy(val boo return rcb } +func (rcb *radixApplicationComponentBuilder) WithHealthChecks(startupProbe, readynessProbe, livenessProbe *radixv1.RadixProbe) RadixApplicationComponentBuilder { + rcb.healtChecks = &radixv1.RadixHealthChecks{ + LivenessProbe: livenessProbe, + ReadinessProbe: readynessProbe, + StartupProbe: startupProbe, + } + return rcb +} + func (rcb *radixApplicationComponentBuilder) WithSourceFolder(sourceFolder string) RadixApplicationComponentBuilder { rcb.sourceFolder = sourceFolder return rcb @@ -230,6 +241,7 @@ func (rcb *radixApplicationComponentBuilder) BuildComponent() radixv1.RadixCompo Name: rcb.name, SourceFolder: rcb.sourceFolder, DockerfileName: rcb.dockerfileName, + HealthChecks: rcb.healtChecks, Image: rcb.image, Ports: rcb.ports, Secrets: rcb.secrets, diff --git a/pkg/apis/utils/applicationjobcomponent_builder.go b/pkg/apis/utils/applicationjobcomponent_builder.go index 8679d05d8..8a85e8d77 100644 --- a/pkg/apis/utils/applicationjobcomponent_builder.go +++ b/pkg/apis/utils/applicationjobcomponent_builder.go @@ -29,6 +29,7 @@ type RadixApplicationJobComponentBuilder interface { WithNotifications(*v1.Notifications) RadixApplicationJobComponentBuilder WithReadOnlyFileSystem(*bool) RadixApplicationJobComponentBuilder WithRuntime(*v1.Runtime) RadixApplicationJobComponentBuilder + WithFailurePolicy(*v1.RadixJobComponentFailurePolicy) RadixApplicationJobComponentBuilder BuildJobComponent() v1.RadixJobComponent } @@ -57,6 +58,7 @@ type radixApplicationJobComponentBuilder struct { monitoring *bool imageTagName string runtime *v1.Runtime + failurePolicy *v1.RadixJobComponentFailurePolicy } func (rcb *radixApplicationJobComponentBuilder) WithTimeLimitSeconds(timeLimitSeconds *int64) RadixApplicationJobComponentBuilder { @@ -202,6 +204,11 @@ func (rcb *radixApplicationJobComponentBuilder) WithRuntime(runtime *v1.Runtime) return rcb } +func (rcb *radixApplicationJobComponentBuilder) WithFailurePolicy(failurePolicy *v1.RadixJobComponentFailurePolicy) RadixApplicationJobComponentBuilder { + rcb.failurePolicy = failurePolicy + return rcb +} + func (rcb *radixApplicationJobComponentBuilder) BuildJobComponent() v1.RadixJobComponent { var environmentConfig = make([]v1.RadixJobComponentEnvironmentConfig, 0) for _, env := range rcb.environmentConfig { @@ -238,6 +245,7 @@ func (rcb *radixApplicationJobComponentBuilder) BuildJobComponent() v1.RadixJobC ImageTagName: rcb.imageTagName, VolumeMounts: rcb.volumes, Runtime: rcb.runtime, + FailurePolicy: rcb.failurePolicy, } } diff --git a/pkg/apis/utils/componentenvironment_builder.go b/pkg/apis/utils/componentenvironment_builder.go index 73bd4a728..c53c56e55 100644 --- a/pkg/apis/utils/componentenvironment_builder.go +++ b/pkg/apis/utils/componentenvironment_builder.go @@ -9,6 +9,7 @@ type RadixEnvironmentConfigBuilder interface { WithEnvironment(string) RadixEnvironmentConfigBuilder WithSourceFolder(string) RadixEnvironmentConfigBuilder WithDockerfileName(string) RadixEnvironmentConfigBuilder + WithHealthChecks(startupProbe, readynessProbe, livenessProbe *radixv1.RadixProbe) RadixEnvironmentConfigBuilder WithImage(string) RadixEnvironmentConfigBuilder WithReplicas(*int) RadixEnvironmentConfigBuilder WithEnvironmentVariable(string, string) RadixEnvironmentConfigBuilder @@ -50,6 +51,7 @@ type radixEnvironmentConfigBuilder struct { readOnlyFileSystem *bool runtime *radixv1.Runtime network *radixv1.Network + healtChecks *radixv1.RadixHealthChecks } func (ceb *radixEnvironmentConfigBuilder) WithHorizontalScaling(scaling *radixv1.RadixHorizontalScaling) RadixEnvironmentConfigBuilder { @@ -65,6 +67,15 @@ func (ceb *radixEnvironmentConfigBuilder) WithResource(request map[string]string return ceb } +func (ceb *radixEnvironmentConfigBuilder) WithHealthChecks(startupProbe, readynessProbe, livenessProbe *radixv1.RadixProbe) RadixEnvironmentConfigBuilder { + ceb.healtChecks = &radixv1.RadixHealthChecks{ + LivenessProbe: livenessProbe, + ReadinessProbe: readynessProbe, + StartupProbe: startupProbe, + } + return ceb +} + func (ceb *radixEnvironmentConfigBuilder) WithVolumeMounts(volumeMounts []radixv1.RadixVolumeMount) RadixEnvironmentConfigBuilder { ceb.volumeMounts = volumeMounts return ceb @@ -164,6 +175,7 @@ func (ceb *radixEnvironmentConfigBuilder) BuildEnvironmentConfig() radixv1.Radix Environment: ceb.environment, SourceFolder: ceb.sourceFolder, DockerfileName: ceb.dockerfileName, + HealthChecks: ceb.healtChecks, Image: ceb.image, Variables: ceb.variables, Replicas: ceb.replicas, diff --git a/pkg/apis/utils/deploymentcomponent_builder.go b/pkg/apis/utils/deploymentcomponent_builder.go index 38b88eb73..984c3ea62 100644 --- a/pkg/apis/utils/deploymentcomponent_builder.go +++ b/pkg/apis/utils/deploymentcomponent_builder.go @@ -23,6 +23,7 @@ type DeployComponentBuilder interface { WithResourceRequestsOnly(map[string]string) DeployComponentBuilder WithResource(map[string]string, map[string]string) DeployComponentBuilder WithVolumeMounts(...v1.RadixVolumeMount) DeployComponentBuilder + WithHealthChecks(startupProbe, readynessProbe, livenessProbe *v1.RadixProbe) DeployComponentBuilder WithNodeGpu(gpu string) DeployComponentBuilder WithNodeGpuCount(gpuCount string) DeployComponentBuilder WithIngressConfiguration(...string) DeployComponentBuilder @@ -70,6 +71,7 @@ type deployComponentBuilder struct { identity *v1.Identity readOnlyFileSystem *bool runtime *v1.Runtime + healtChecks *v1.RadixHealthChecks } func (dcb *deployComponentBuilder) WithVolumeMounts(volumeMounts ...v1.RadixVolumeMount) DeployComponentBuilder { @@ -77,6 +79,15 @@ func (dcb *deployComponentBuilder) WithVolumeMounts(volumeMounts ...v1.RadixVolu return dcb } +func (dcb *deployComponentBuilder) WithHealthChecks(startupProbe, readynessProbe, livenessProbe *v1.RadixProbe) DeployComponentBuilder { + dcb.healtChecks = &v1.RadixHealthChecks{ + LivenessProbe: livenessProbe, + ReadinessProbe: readynessProbe, + StartupProbe: startupProbe, + } + return dcb +} + func (dcb *deployComponentBuilder) WithNodeGpu(gpu string) DeployComponentBuilder { dcb.node.Gpu = gpu return dcb @@ -257,6 +268,7 @@ func (dcb *deployComponentBuilder) BuildComponent() v1.RadixDeployComponent { DNSExternalAlias: dcb.externalAppAlias, ExternalDNS: dcb.externalDNS, Resources: dcb.resources, + HealthChecks: dcb.healtChecks, HorizontalScaling: dcb.horizontalScaling, VolumeMounts: dcb.volumeMounts, AlwaysPullImageOnDeploy: dcb.alwaysPullImageOnDeploy, diff --git a/pkg/apis/utils/deploymentjobcomponent_builder.go b/pkg/apis/utils/deploymentjobcomponent_builder.go index a62e2cdfb..221fb05f9 100644 --- a/pkg/apis/utils/deploymentjobcomponent_builder.go +++ b/pkg/apis/utils/deploymentjobcomponent_builder.go @@ -29,6 +29,7 @@ type DeployJobComponentBuilder interface { WithNotifications(*v1.Notifications) DeployJobComponentBuilder WithRuntime(*v1.Runtime) DeployJobComponentBuilder WithBatchStatusRules(batchStatusRules ...v1.BatchStatusRule) DeployJobComponentBuilder + WithFailurePolicy(*v1.RadixJobComponentFailurePolicy) DeployJobComponentBuilder BuildJobComponent() v1.RadixDeployJobComponent } @@ -52,6 +53,7 @@ type deployJobComponentBuilder struct { notifications *v1.Notifications runtime *v1.Runtime batchStatusRules []v1.BatchStatusRule + failurePolicy *v1.RadixJobComponentFailurePolicy } func (dcb *deployJobComponentBuilder) WithVolumeMounts(volumeMounts ...v1.RadixVolumeMount) DeployJobComponentBuilder { @@ -184,6 +186,11 @@ func (dcb *deployJobComponentBuilder) WithBatchStatusRules(batchStatusRules ...v return dcb } +func (dcb *deployJobComponentBuilder) WithFailurePolicy(failurePolicy *v1.RadixJobComponentFailurePolicy) DeployJobComponentBuilder { + dcb.failurePolicy = failurePolicy + return dcb +} + func (dcb *deployJobComponentBuilder) BuildJobComponent() v1.RadixDeployJobComponent { var payload *v1.RadixJobComponentPayload if dcb.payloadPath != nil { @@ -210,6 +217,7 @@ func (dcb *deployJobComponentBuilder) BuildJobComponent() v1.RadixDeployJobCompo Notifications: dcb.notifications, Runtime: dcb.runtime, BatchStatusRules: dcb.batchStatusRules, + FailurePolicy: dcb.failurePolicy, } } diff --git a/pkg/apis/utils/failurepolicy.go b/pkg/apis/utils/failurepolicy.go new file mode 100644 index 000000000..46e6db1d6 --- /dev/null +++ b/pkg/apis/utils/failurepolicy.go @@ -0,0 +1,35 @@ +package utils + +import ( + "slices" + + "github.com/equinor/radix-common/utils/slice" + radixv1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" + batchv1 "k8s.io/api/batch/v1" +) + +// GetPodFailurePolicy converts a RadixJobComponentFailurePolicy into a native Kubernetes batch PodFailurePolicy +func GetPodFailurePolicy(failurePolicy *radixv1.RadixJobComponentFailurePolicy) *batchv1.PodFailurePolicy { + if failurePolicy == nil { + return nil + } + + onExitCodesMapper := func(onExitCodes radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes) *batchv1.PodFailurePolicyOnExitCodesRequirement { + slices.Sort(onExitCodes.Values) + return &batchv1.PodFailurePolicyOnExitCodesRequirement{ + Operator: batchv1.PodFailurePolicyOnExitCodesOperator(onExitCodes.Operator), + Values: onExitCodes.Values, + } + } + + ruleMapper := func(rule radixv1.RadixJobComponentFailurePolicyRule) batchv1.PodFailurePolicyRule { + return batchv1.PodFailurePolicyRule{ + Action: batchv1.PodFailurePolicyAction(rule.Action), + OnExitCodes: onExitCodesMapper(rule.OnExitCodes), + } + } + + return &batchv1.PodFailurePolicy{ + Rules: slice.Map(failurePolicy.Rules, ruleMapper), + } +} diff --git a/pkg/apis/utils/failurepolicy_test.go b/pkg/apis/utils/failurepolicy_test.go new file mode 100644 index 000000000..4d8108c79 --- /dev/null +++ b/pkg/apis/utils/failurepolicy_test.go @@ -0,0 +1,71 @@ +package utils_test + +import ( + "testing" + + radixv1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" + "github.com/equinor/radix-operator/pkg/apis/utils" + "github.com/stretchr/testify/assert" + batchv1 "k8s.io/api/batch/v1" +) + +func Test_GetPodFailurePolicy_Action(t *testing.T) { + radixPolicy := radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionCount}, + {Action: radixv1.RadixJobComponentFailurePolicyActionFailJob}, + {Action: radixv1.RadixJobComponentFailurePolicyActionIgnore}, + }, + } + actualPolilcy := utils.GetPodFailurePolicy(&radixPolicy) + expectedPolicy := &batchv1.PodFailurePolicy{ + Rules: []batchv1.PodFailurePolicyRule{ + {Action: batchv1.PodFailurePolicyActionCount, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{}}, + {Action: batchv1.PodFailurePolicyActionFailJob, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{}}, + {Action: batchv1.PodFailurePolicyActionIgnore, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{}}, + }, + } + assert.Equal(t, expectedPolicy, actualPolilcy) +} + +func Test_GetPodFailurePolicy_OnExitCodes_Operator(t *testing.T) { + radixPolicy := radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + {Action: radixv1.RadixJobComponentFailurePolicyActionCount, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn}}, + {Action: radixv1.RadixJobComponentFailurePolicyActionCount, OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpNotIn}}, + }, + } + actualPolilcy := utils.GetPodFailurePolicy(&radixPolicy) + expectedPolicy := &batchv1.PodFailurePolicy{ + Rules: []batchv1.PodFailurePolicyRule{ + {Action: batchv1.PodFailurePolicyActionCount, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{Operator: batchv1.PodFailurePolicyOnExitCodesOpIn}}, + {Action: batchv1.PodFailurePolicyActionCount, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{Operator: batchv1.PodFailurePolicyOnExitCodesOpNotIn}}, + }, + } + assert.Equal(t, expectedPolicy, actualPolilcy) +} + +func Test_GetPodFailurePolicy_OnExitCodes_ValuesSorted(t *testing.T) { + radixPolicy := radixv1.RadixJobComponentFailurePolicy{ + Rules: []radixv1.RadixJobComponentFailurePolicyRule{ + { + Action: radixv1.RadixJobComponentFailurePolicyActionCount, + OnExitCodes: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodes{Operator: radixv1.RadixJobComponentFailurePolicyRuleOnExitCodesOpIn, Values: []int32{4, 2, 3, 1}}, + }, + }, + } + actualPolilcy := utils.GetPodFailurePolicy(&radixPolicy) + expectedPolicy := &batchv1.PodFailurePolicy{ + Rules: []batchv1.PodFailurePolicyRule{ + { + Action: batchv1.PodFailurePolicyActionCount, + OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{Operator: batchv1.PodFailurePolicyOnExitCodesOpIn, Values: []int32{1, 2, 3, 4}}, + }, + }, + } + assert.Equal(t, expectedPolicy, actualPolilcy) +} + +func Test_GetPodFailurePolicy_Nil(t *testing.T) { + assert.Nil(t, utils.GetPodFailurePolicy(nil)) +} diff --git a/pkg/apis/utils/jobcomponentenvironment_builder.go b/pkg/apis/utils/jobcomponentenvironment_builder.go index 63ea397c1..69c6f6762 100644 --- a/pkg/apis/utils/jobcomponentenvironment_builder.go +++ b/pkg/apis/utils/jobcomponentenvironment_builder.go @@ -25,6 +25,7 @@ type RadixJobComponentEnvironmentConfigBuilder interface { WithNotifications(*v1.Notifications) RadixJobComponentEnvironmentConfigBuilder WithReadOnlyFileSystem(*bool) RadixJobComponentEnvironmentConfigBuilder WithRuntime(*v1.Runtime) RadixJobComponentEnvironmentConfigBuilder + WithFailurePolicy(*v1.RadixJobComponentFailurePolicy) RadixJobComponentEnvironmentConfigBuilder BuildEnvironmentConfig() v1.RadixJobComponentEnvironmentConfig } @@ -48,6 +49,7 @@ type radixJobComponentEnvironmentConfigBuilder struct { notifications *v1.Notifications readOnlyFileSystem *bool runtime *v1.Runtime + failurePolicy *v1.RadixJobComponentFailurePolicy } func (ceb *radixJobComponentEnvironmentConfigBuilder) WithTimeLimitSeconds(timeLimitSeconds *int64) RadixJobComponentEnvironmentConfigBuilder { @@ -151,6 +153,11 @@ func (ceb *radixJobComponentEnvironmentConfigBuilder) WithRuntime(runtime *v1.Ru return ceb } +func (ceb *radixJobComponentEnvironmentConfigBuilder) WithFailurePolicy(failurePolicy *v1.RadixJobComponentFailurePolicy) RadixJobComponentEnvironmentConfigBuilder { + ceb.failurePolicy = failurePolicy + return ceb +} + func (ceb *radixJobComponentEnvironmentConfigBuilder) BuildEnvironmentConfig() v1.RadixJobComponentEnvironmentConfig { return v1.RadixJobComponentEnvironmentConfig{ Environment: ceb.environment, @@ -171,6 +178,7 @@ func (ceb *radixJobComponentEnvironmentConfigBuilder) BuildEnvironmentConfig() v Notifications: ceb.notifications, ReadOnlyFileSystem: ceb.readOnlyFileSystem, Runtime: ceb.runtime, + FailurePolicy: ceb.failurePolicy, } } diff --git a/radix-operator/batch/internal/syncerfactory.go b/radix-operator/batch/internal/syncerfactory.go index ba11be8a5..690923487 100644 --- a/radix-operator/batch/internal/syncerfactory.go +++ b/radix-operator/batch/internal/syncerfactory.go @@ -16,7 +16,9 @@ type SyncerFactory interface { kubeutil *kube.Kube, radixclient radixclient.Interface, radixBatch *radixv1.RadixBatch, - config *config.Config) batch.Syncer + config *config.Config, + options ...batch.SyncerOption, + ) batch.Syncer } // SyncerFactoryFunc is an adapter that can be used to convert @@ -27,6 +29,7 @@ type SyncerFactoryFunc func( radixclient radixclient.Interface, radixBatch *radixv1.RadixBatch, config *config.Config, + options ...batch.SyncerOption, ) batch.Syncer func (f SyncerFactoryFunc) CreateSyncer( @@ -35,6 +38,7 @@ func (f SyncerFactoryFunc) CreateSyncer( radixclient radixclient.Interface, radixBatch *radixv1.RadixBatch, config *config.Config, + options ...batch.SyncerOption, ) batch.Syncer { - return f(kubeclient, kubeutil, radixclient, radixBatch, config) + return f(kubeclient, kubeutil, radixclient, radixBatch, config, options...) } diff --git a/radix-operator/batch/internal/syncerfactory_mock.go b/radix-operator/batch/internal/syncerfactory_mock.go index 4d3c604dc..49c810b7f 100644 --- a/radix-operator/batch/internal/syncerfactory_mock.go +++ b/radix-operator/batch/internal/syncerfactory_mock.go @@ -40,15 +40,20 @@ func (m *MockSyncerFactory) EXPECT() *MockSyncerFactoryMockRecorder { } // CreateSyncer mocks base method. -func (m *MockSyncerFactory) CreateSyncer(kubeclient kubernetes.Interface, kubeutil *kube.Kube, radixclient versioned.Interface, radixBatch *v1.RadixBatch, config *config.Config) batch.Syncer { +func (m *MockSyncerFactory) CreateSyncer(kubeclient kubernetes.Interface, kubeutil *kube.Kube, radixclient versioned.Interface, radixBatch *v1.RadixBatch, config *config.Config, options ...batch.SyncerOption) batch.Syncer { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateSyncer", kubeclient, kubeutil, radixclient, radixBatch, config) + varargs := []interface{}{kubeclient, kubeutil, radixclient, radixBatch, config} + for _, a := range options { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "CreateSyncer", varargs...) ret0, _ := ret[0].(batch.Syncer) return ret0 } // CreateSyncer indicates an expected call of CreateSyncer. -func (mr *MockSyncerFactoryMockRecorder) CreateSyncer(kubeclient, kubeutil, radixclient, radixBatch, config interface{}) *gomock.Call { +func (mr *MockSyncerFactoryMockRecorder) CreateSyncer(kubeclient, kubeutil, radixclient, radixBatch, config interface{}, options ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateSyncer", reflect.TypeOf((*MockSyncerFactory)(nil).CreateSyncer), kubeclient, kubeutil, radixclient, radixBatch, config) + varargs := append([]interface{}{kubeclient, kubeutil, radixclient, radixBatch, config}, options...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateSyncer", reflect.TypeOf((*MockSyncerFactory)(nil).CreateSyncer), varargs...) }