Skip to content

Commit

Permalink
feat: more config ttl for error job pod
Browse files Browse the repository at this point in the history
  • Loading branch information
lavData committed Dec 28, 2023
1 parent fc9b1bd commit 96fcca2
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 58 deletions.
117 changes: 59 additions & 58 deletions charts/airbyte-pod-sweeper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,63 +12,64 @@ Helm chart to deploy airbyte-pod-sweeper

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | |
| containerSecurityContext | object | `{}` | |
| enabled | bool | `true` | |
| extraVolumeMounts | list | `[]` | |
| extraVolumes | list | `[]` | |
| global.database.secretName | string | `""` | |
| global.database.secretValue | string | `""` | |
| global.jobs.kube.annotations | object | `{}` | |
| Key | Type | Default | Description |
|---------------------------------------------------|------|---------|-------------|
| affinity | object | `{}` | |
| containerSecurityContext | object | `{}` | |
| enabled | bool | `true` | |
| extraVolumeMounts | list | `[]` | |
| extraVolumes | list | `[]` | |
| global.database.secretName | string | `""` | |
| global.database.secretValue | string | `""` | |
| global.jobs.kube.annotations | object | `{}` | |
| global.jobs.kube.main_container_image_pull_secret | string | `""` | |
| global.jobs.kube.nodeSelector | object | `{}` | |
| global.jobs.kube.tolerations | list | `[]` | |
| global.jobs.resources.limits | object | `{}` | |
| global.jobs.resources.requests | object | `{}` | |
| global.logs.accessKey.existingSecret | string | `""` | |
| global.logs.accessKey.existingSecretKey | string | `""` | |
| global.logs.accessKey.password | string | `"minio"` | |
| global.logs.externalMinio.enabled | bool | `false` | |
| global.logs.externalMinio.host | string | `"localhost"` | |
| global.logs.externalMinio.port | int | `9000` | |
| global.logs.gcs.bucket | string | `""` | |
| global.logs.gcs.credentials | string | `""` | |
| global.logs.gcs.credentialsJson | string | `""` | |
| global.logs.minio.enabled | bool | `true` | |
| global.logs.s3.bucket | string | `"airbyte-dev-logs"` | |
| global.logs.s3.bucketRegion | string | `""` | |
| global.logs.s3.enabled | bool | `false` | |
| global.logs.secretKey.existingSecret | string | `""` | |
| global.logs.secretKey.existingSecretKey | string | `""` | |
| global.logs.secretKey.password | string | `"minio123"` | |
| global.secretName | string | `""` | |
| global.serviceAccountName | string | `"airbyte-admin"` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"bitnami/kubectl"` | |
| image.tag | string | `"latest"` | |
| livenessProbe.enabled | bool | `true` | |
| livenessProbe.failureThreshold | int | `3` | |
| livenessProbe.initialDelaySeconds | int | `5` | |
| livenessProbe.periodSeconds | int | `30` | |
| livenessProbe.successThreshold | int | `1` | |
| livenessProbe.timeoutSeconds | int | `1` | |
| namespace | string | `""` | |
| nodeSelector | object | `{}` | |
| podAnnotations | object | `{}` | |
| podLabels | object | `{}` | |
| readinessProbe.enabled | bool | `true` | |
| readinessProbe.failureThreshold | int | `3` | |
| readinessProbe.initialDelaySeconds | int | `5` | |
| readinessProbe.periodSeconds | int | `30` | |
| readinessProbe.successThreshold | int | `1` | |
| readinessProbe.timeoutSeconds | int | `1` | |
| replicaCount | int | `1` | |
| resources.limits | object | `{}` | |
| resources.requests | object | `{}` | |
| timeToDeletePods.running | string | `""` | |
| timeToDeletePods.succeeded | int | `120` | |
| timeToDeletePods.unsuccessful | int | `1440` | |
| tolerations | list | `[]` | |
| global.jobs.kube.nodeSelector | object | `{}` | |
| global.jobs.kube.tolerations | list | `[]` | |
| global.jobs.resources.limits | object | `{}` | |
| global.jobs.resources.requests | object | `{}` | |
| global.logs.accessKey.existingSecret | string | `""` | |
| global.logs.accessKey.existingSecretKey | string | `""` | |
| global.logs.accessKey.password | string | `"minio"` | |
| global.logs.externalMinio.enabled | bool | `false` | |
| global.logs.externalMinio.host | string | `"localhost"` | |
| global.logs.externalMinio.port | int | `9000` | |
| global.logs.gcs.bucket | string | `""` | |
| global.logs.gcs.credentials | string | `""` | |
| global.logs.gcs.credentialsJson | string | `""` | |
| global.logs.minio.enabled | bool | `true` | |
| global.logs.s3.bucket | string | `"airbyte-dev-logs"` | |
| global.logs.s3.bucketRegion | string | `""` | |
| global.logs.s3.enabled | bool | `false` | |
| global.logs.secretKey.existingSecret | string | `""` | |
| global.logs.secretKey.existingSecretKey | string | `""` | |
| global.logs.secretKey.password | string | `"minio123"` | |
| global.secretName | string | `""` | |
| global.serviceAccountName | string | `"airbyte-admin"` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"bitnami/kubectl"` | |
| image.tag | string | `"latest"` | |
| livenessProbe.enabled | bool | `true` | |
| livenessProbe.failureThreshold | int | `3` | |
| livenessProbe.initialDelaySeconds | int | `5` | |
| livenessProbe.periodSeconds | int | `30` | |
| livenessProbe.successThreshold | int | `1` | |
| livenessProbe.timeoutSeconds | int | `1` | |
| namespace | string | `""` | |
| nodeSelector | object | `{}` | |
| podAnnotations | object | `{}` | |
| podLabels | object | `{}` | |
| readinessProbe.enabled | bool | `true` | |
| readinessProbe.failureThreshold | int | `3` | |
| readinessProbe.initialDelaySeconds | int | `5` | |
| readinessProbe.periodSeconds | int | `30` | |
| readinessProbe.successThreshold | int | `1` | |
| readinessProbe.timeoutSeconds | int | `1` | |
| replicaCount | int | `1` | |
| resources.limits | object | `{}` | |
| resources.requests | object | `{}` | |
| timeToDeletePods.running | string | `""` | |
| timeToDeletePods.error | string | `""` | |
| timeToDeletePods.succeeded | int | `120` | |
| timeToDeletePods.unsuccessful | int | `1440` | |
| tolerations | list | `[]` | |

11 changes: 11 additions & 0 deletions charts/airbyte-pod-sweeper/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ data:
NON_SUCCESS_DATE=`date -d $NON_SUCCESS_DATE_STR +%s`
echo "Will sweep unsuccessful pods from before ${NON_SUCCESS_DATE_STR}"
fi
if [ -n "${ERROR_TTL_MINUTES}" ]; then
# Longer time window for unsuccessful pods (to debug)
ERROR_DATE_STR=`date -d "now - ${ERROR_TTL_MINUTES} minutes" --utc -Ins`
ERROR_DATE=`date -d $ERROR_DATE_STR +%s`
echo "Will sweep error pods from before ${ERROR_DATE_STR}"
fi
(
IFS=$'\n'
for POD in `get_job_pods`; do
Expand All @@ -65,6 +72,10 @@ data:
if [ "$POD_DATE" -lt "$NON_SUCCESS_DATE" ]; then
delete_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR"
fi
elif [ -n "${ERROR_TTL_MINUTES}" ] && [ "$POD_STATUS" = "Error" ]; then
if [ "$POD_DATE" -lt "$ERROR_DATE" ]; then
delete_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR"
fi
fi
done
)
Expand Down
2 changes: 2 additions & 0 deletions charts/airbyte-pod-sweeper/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ spec:
value: "{{ .Values.timeToDeletePods.succeeded }}"
- name: UNSUCCESSFUL_TTL_MINUTES
value: "{{ .Values.timeToDeletePods.unsuccessful }}"
- name: ERROR_TTL_MINUTES
value: "{{ .Values.timeToDeletePods.error }}"
{{- if .Values.containerSecurityContext }}
securityContext: {{- toYaml .Values.containerSecurityContext | nindent 10 }}
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions charts/airbyte-pod-sweeper/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ extraVolumes: []
## podSweeper.timeToDeletePods.unsuccessful Time to remove pods on neither running nor succeeded status (minutes).
timeToDeletePods:
running: ""
error: ""
succeeded: 120
unsuccessful: 1440

Expand Down

0 comments on commit 96fcca2

Please sign in to comment.