diff --git a/.github/renovate/groups.json5 b/.github/renovate/groups.json5 index af0f31fd40..c74345fb12 100644 --- a/.github/renovate/groups.json5 +++ b/.github/renovate/groups.json5 @@ -31,17 +31,6 @@ }, "separateMinorPatch": true }, - { - "description": ["Flux Group"], - "groupName": "Flux", - "matchPackagePatterns": ["fluxcd"], - "matchDatasources": ["docker", "github-tags"], - "versioning": "semver", - "group": { - "commitMessageTopic": "{{{groupName}}} group" - }, - "separateMinorPatch": true - }, { "description": ["Intel Device Plugins Group"], "groupName": "Intel-Device-Plugins", diff --git a/.taskfiles/bootstrap/Taskfile.yaml b/.taskfiles/bootstrap/Taskfile.yaml index d676cfbfb8..51c419007a 100644 --- a/.taskfiles/bootstrap/Taskfile.yaml +++ b/.taskfiles/bootstrap/Taskfile.yaml @@ -98,7 +98,6 @@ tasks: flux: desc: Bootstrap Flux [CLUSTER=main] cmds: - - kubectl apply --server-side --kustomize {{.CLUSTER_DIR}}/bootstrap/apps - for: { var: TEMPLATES } cmd: op run --env-file {{.CLUSTER_DIR}}/bootstrap/bootstrap.env --no-masking -- minijinja-cli {{.ITEM}} | kubectl apply --server-side --filename - - kubectl apply --server-side --filename {{.CLUSTER_DIR}}/flux/settings/cluster-settings.yaml diff --git a/kubernetes/main/apps/flux-system/flux/app/helm-values.yaml b/kubernetes/main/apps/flux-system/flux/app/helm-values.yaml new file mode 100644 index 0000000000..6322529193 --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/app/helm-values.yaml @@ -0,0 +1,73 @@ +--- +crds: + annotations: + helm.sh/resource-policy: keep + +helmController: + container: + additionalArgs: + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + # Flux near OOM detection for Helm + # Ref: https://fluxcd.io/flux/installation/configuration/helm-oom-detection/ + - --feature-gates=OOMWatch=true + - --oom-watch-memory-threshold=95 + - --oom-watch-interval=500ms + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +imageAutomationController: + create: false + +imageReflectionController: + create: false + +kustomizeController: + container: + additionalArgs: + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +notificationController: + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +sourceController: + container: + additionalArgs: + # Enable Helm repositories caching + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#enable-helm-repositories-caching + - --helm-cache-max-size=10 + - --helm-cache-ttl=60m + - --helm-cache-purge-interval=5m + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +policies: + create: false + +prometheus: + podMonitor: + create: true diff --git a/kubernetes/main/apps/flux-system/flux/app/helmrelease.yaml b/kubernetes/main/apps/flux-system/flux/app/helmrelease.yaml new file mode 100644 index 0000000000..35f5b53e3b --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/app/helmrelease.yaml @@ -0,0 +1,27 @@ +--- +# yaml-language-server: $schema=https://kube-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: flux +spec: + interval: 30m + chart: + spec: + chart: flux2 + version: 2.14.0 + sourceRef: + kind: HelmRepository + name: fluxcd-community + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + valuesFrom: + - kind: ConfigMap + name: flux-helm-values diff --git a/kubernetes/main/apps/flux-system/flux/app/kustomization.yaml b/kubernetes/main/apps/flux-system/flux/app/kustomization.yaml new file mode 100644 index 0000000000..425e6202b7 --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/app/kustomization.yaml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./prometheusrule.yaml +configMapGenerator: + - name: flux-helm-values + files: + - values.yaml=./helm-values.yaml +configurations: + - kustomizeconfig.yaml diff --git a/kubernetes/main/apps/flux-system/flux/app/kustomizeconfig.yaml b/kubernetes/main/apps/flux-system/flux/app/kustomizeconfig.yaml new file mode 100644 index 0000000000..58f92ba153 --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/app/kustomizeconfig.yaml @@ -0,0 +1,7 @@ +--- +nameReference: + - kind: ConfigMap + version: v1 + fieldSpecs: + - path: spec/valuesFrom/name + kind: HelmRelease diff --git a/kubernetes/main/apps/flux-system/flux/app/prometheusrule.yaml b/kubernetes/main/apps/flux-system/flux/app/prometheusrule.yaml new file mode 100644 index 0000000000..4257e56de8 --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/app/prometheusrule.yaml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: flux-rules + namespace: flux-system +spec: + groups: + - name: flux.rules + rules: + - alert: FluxComponentAbsent + annotations: + summary: Flux component has disappeared from Prometheus target discovery. + expr: | + absent(up{job=~".*flux-system.*"} == 1) + for: 15m + labels: + severity: critical + - alert: FluxReconciliationFailure + annotations: + summary: >- + {{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation + has been failing for more than 15 minutes. + expr: | + max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind) + + + on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) + by (namespace, name, kind)) * 2 == 1 + for: 15m + labels: + severity: critical diff --git a/kubernetes/main/apps/flux-system/addons/webhooks/kustomization.yaml b/kubernetes/main/apps/flux-system/flux/github/kustomization.yaml similarity index 91% rename from kubernetes/main/apps/flux-system/addons/webhooks/kustomization.yaml rename to kubernetes/main/apps/flux-system/flux/github/kustomization.yaml index 08c1780f06..b693651a5c 100644 --- a/kubernetes/main/apps/flux-system/addons/webhooks/kustomization.yaml +++ b/kubernetes/main/apps/flux-system/flux/github/kustomization.yaml @@ -3,4 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ./github + - ./webhooks diff --git a/kubernetes/main/apps/flux-system/addons/webhooks/github/externalsecret.yaml b/kubernetes/main/apps/flux-system/flux/github/webhooks/externalsecret.yaml similarity index 100% rename from kubernetes/main/apps/flux-system/addons/webhooks/github/externalsecret.yaml rename to kubernetes/main/apps/flux-system/flux/github/webhooks/externalsecret.yaml diff --git a/kubernetes/main/apps/flux-system/addons/webhooks/github/ingress.yaml b/kubernetes/main/apps/flux-system/flux/github/webhooks/ingress.yaml similarity index 100% rename from kubernetes/main/apps/flux-system/addons/webhooks/github/ingress.yaml rename to kubernetes/main/apps/flux-system/flux/github/webhooks/ingress.yaml diff --git a/kubernetes/main/apps/flux-system/addons/webhooks/github/kustomization.yaml b/kubernetes/main/apps/flux-system/flux/github/webhooks/kustomization.yaml similarity index 100% rename from kubernetes/main/apps/flux-system/addons/webhooks/github/kustomization.yaml rename to kubernetes/main/apps/flux-system/flux/github/webhooks/kustomization.yaml diff --git a/kubernetes/main/apps/flux-system/addons/webhooks/github/receiver.yaml b/kubernetes/main/apps/flux-system/flux/github/webhooks/receiver.yaml similarity index 100% rename from kubernetes/main/apps/flux-system/addons/webhooks/github/receiver.yaml rename to kubernetes/main/apps/flux-system/flux/github/webhooks/receiver.yaml diff --git a/kubernetes/main/apps/flux-system/flux/ks.yaml b/kubernetes/main/apps/flux-system/flux/ks.yaml new file mode 100644 index 0000000000..628e9f30e3 --- /dev/null +++ b/kubernetes/main/apps/flux-system/flux/ks.yaml @@ -0,0 +1,42 @@ +--- +# yaml-language-server: $schema=https://kube-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app flux + namespace: flux-system +spec: + targetNamespace: flux-system + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/main/apps/flux-system/flux/app + prune: false # never should be deleted + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + timeout: 5m +--- +# yaml-language-server: $schema=https://kube-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app flux-github + namespace: flux-system +spec: + targetNamespace: flux-system + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: external-secrets-stores + path: ./kubernetes/main/apps/flux-system/flux/github + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + timeout: 5m diff --git a/kubernetes/main/apps/flux-system/kustomization.yaml b/kubernetes/main/apps/flux-system/kustomization.yaml index 5bc1651dc8..02c8597c20 100644 --- a/kubernetes/main/apps/flux-system/kustomization.yaml +++ b/kubernetes/main/apps/flux-system/kustomization.yaml @@ -3,7 +3,9 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + # Pre Flux-Kustomizations - ./namespace.yaml - - ./addons/ks.yaml + # Flux-Kustomizations - ./clickops/ks.yaml + - ./flux/ks.yaml - ./weave-gitops/ks.yaml diff --git a/kubernetes/main/bootstrap/apps/helmfile.yaml b/kubernetes/main/bootstrap/apps/helmfile.yaml index d284bdf7df..283fea540a 100644 --- a/kubernetes/main/bootstrap/apps/helmfile.yaml +++ b/kubernetes/main/bootstrap/apps/helmfile.yaml @@ -18,9 +18,6 @@ repositories: - name: coredns url: https://coredns.github.io/helm - - name: postfinance - url: https://postfinance.github.io/kubelet-csr-approver - releases: - name: kube-prometheus-stack-crds namespace: observability @@ -36,7 +33,8 @@ releases: - commonLabels: helm.toolkit.fluxcd.io/name: *name helm.toolkit.fluxcd.io/namespace: *namespace - needs: ["observability/kube-prometheus-stack-crds"] + needs: + - observability/kube-prometheus-stack-crds - name: &name coredns namespace: &namespace kube-system @@ -47,11 +45,39 @@ releases: - customLabels: helm.toolkit.fluxcd.io/name: *name helm.toolkit.fluxcd.io/namespace: *namespace - needs: ["kube-system/cilium"] + needs: + - kube-system/cilium - name: spegel namespace: kube-system chart: oci://ghcr.io/spegel-org/helm-charts/spegel version: v0.0.28 values: ["../../apps/kube-system/spegel/app/helm-values.yaml"] - needs: ["kube-system/coredns"] + needs: + - kube-system/cilium + + - name: &name flux + namespace: &namespace flux-system + chart: oci://ghcr.io/fluxcd-community/charts/flux2 + version: 2.14.0 + values: + - ../../apps/flux-system/flux/app/helm-values.yaml + - helmController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - kustomizeController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - notificationController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - sourceController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + needs: + - kube-system/coredns + - kube-system/spegel diff --git a/kubernetes/shared/repos/helm/fluxcd-community.yaml b/kubernetes/shared/repos/helm/fluxcd-community.yaml new file mode 100644 index 0000000000..7a45f4fd6a --- /dev/null +++ b/kubernetes/shared/repos/helm/fluxcd-community.yaml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://kube-schemas.pages.dev/source.toolkit.fluxcd.io/helmrepository_v1.json +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: fluxcd-community + namespace: flux-system +spec: + type: oci + interval: 5m + url: oci://ghcr.io/fluxcd-community/charts diff --git a/kubernetes/shared/repos/helm/kustomization.yaml b/kubernetes/shared/repos/helm/kustomization.yaml index 531505c0b1..7c23b3a6bf 100644 --- a/kubernetes/shared/repos/helm/kustomization.yaml +++ b/kubernetes/shared/repos/helm/kustomization.yaml @@ -12,6 +12,7 @@ resources: - ./descheduler.yaml - ./external-dns.yaml - ./external-secrets.yaml + - ./fluxcd-community.yaml - ./grafana.yaml - ./ingress-nginx.yaml - ./intel.yaml diff --git a/kubernetes/utility/apps/flux-system/flux/app/helm-values.yaml b/kubernetes/utility/apps/flux-system/flux/app/helm-values.yaml new file mode 100644 index 0000000000..6322529193 --- /dev/null +++ b/kubernetes/utility/apps/flux-system/flux/app/helm-values.yaml @@ -0,0 +1,73 @@ +--- +crds: + annotations: + helm.sh/resource-policy: keep + +helmController: + container: + additionalArgs: + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + # Flux near OOM detection for Helm + # Ref: https://fluxcd.io/flux/installation/configuration/helm-oom-detection/ + - --feature-gates=OOMWatch=true + - --oom-watch-memory-threshold=95 + - --oom-watch-interval=500ms + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +imageAutomationController: + create: false + +imageReflectionController: + create: false + +kustomizeController: + container: + additionalArgs: + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +notificationController: + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +sourceController: + container: + additionalArgs: + # Enable Helm repositories caching + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#enable-helm-repositories-caching + - --helm-cache-max-size=10 + - --helm-cache-ttl=60m + - --helm-cache-purge-interval=5m + # Increase the number of workers and limits + # Ref: https://fluxcd.io/flux/installation/configuration/vertical-scaling/#increase-the-number-of-workers-and-limits + - --concurrent=10 + - --requeue-dependency=5s + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + +policies: + create: false + +prometheus: + podMonitor: + create: true diff --git a/kubernetes/utility/apps/flux-system/flux/app/helmrelease.yaml b/kubernetes/utility/apps/flux-system/flux/app/helmrelease.yaml new file mode 100644 index 0000000000..35f5b53e3b --- /dev/null +++ b/kubernetes/utility/apps/flux-system/flux/app/helmrelease.yaml @@ -0,0 +1,27 @@ +--- +# yaml-language-server: $schema=https://kube-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: flux +spec: + interval: 30m + chart: + spec: + chart: flux2 + version: 2.14.0 + sourceRef: + kind: HelmRepository + name: fluxcd-community + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + valuesFrom: + - kind: ConfigMap + name: flux-helm-values diff --git a/kubernetes/utility/apps/flux-system/flux/app/kustomization.yaml b/kubernetes/utility/apps/flux-system/flux/app/kustomization.yaml new file mode 100644 index 0000000000..425e6202b7 --- /dev/null +++ b/kubernetes/utility/apps/flux-system/flux/app/kustomization.yaml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./prometheusrule.yaml +configMapGenerator: + - name: flux-helm-values + files: + - values.yaml=./helm-values.yaml +configurations: + - kustomizeconfig.yaml diff --git a/kubernetes/utility/apps/flux-system/flux/app/kustomizeconfig.yaml b/kubernetes/utility/apps/flux-system/flux/app/kustomizeconfig.yaml new file mode 100644 index 0000000000..58f92ba153 --- /dev/null +++ b/kubernetes/utility/apps/flux-system/flux/app/kustomizeconfig.yaml @@ -0,0 +1,7 @@ +--- +nameReference: + - kind: ConfigMap + version: v1 + fieldSpecs: + - path: spec/valuesFrom/name + kind: HelmRelease diff --git a/kubernetes/utility/apps/flux-system/flux/app/prometheusrule.yaml b/kubernetes/utility/apps/flux-system/flux/app/prometheusrule.yaml new file mode 100644 index 0000000000..4257e56de8 --- /dev/null +++ b/kubernetes/utility/apps/flux-system/flux/app/prometheusrule.yaml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: flux-rules + namespace: flux-system +spec: + groups: + - name: flux.rules + rules: + - alert: FluxComponentAbsent + annotations: + summary: Flux component has disappeared from Prometheus target discovery. + expr: | + absent(up{job=~".*flux-system.*"} == 1) + for: 15m + labels: + severity: critical + - alert: FluxReconciliationFailure + annotations: + summary: >- + {{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation + has been failing for more than 15 minutes. + expr: | + max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind) + + + on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) + by (namespace, name, kind)) * 2 == 1 + for: 15m + labels: + severity: critical diff --git a/kubernetes/main/apps/flux-system/addons/ks.yaml b/kubernetes/utility/apps/flux-system/flux/ks.yaml similarity index 76% rename from kubernetes/main/apps/flux-system/addons/ks.yaml rename to kubernetes/utility/apps/flux-system/flux/ks.yaml index 8bbc606116..9f7e805070 100644 --- a/kubernetes/main/apps/flux-system/addons/ks.yaml +++ b/kubernetes/utility/apps/flux-system/flux/ks.yaml @@ -3,18 +3,18 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: &app flux-webhooks + name: &app flux namespace: flux-system spec: targetNamespace: flux-system commonMetadata: labels: app.kubernetes.io/name: *app - path: ./kubernetes/main/apps/flux-system/addons/webhooks - prune: true + path: ./kubernetes/utility/apps/flux-system/flux/app + prune: false # never should be deleted sourceRef: kind: GitRepository name: home-kubernetes - wait: true + wait: false interval: 30m timeout: 5m diff --git a/kubernetes/utility/bootstrap/apps/helmfile.yaml b/kubernetes/utility/bootstrap/apps/helmfile.yaml index 5642358cd3..ed70874aed 100644 --- a/kubernetes/utility/bootstrap/apps/helmfile.yaml +++ b/kubernetes/utility/bootstrap/apps/helmfile.yaml @@ -18,9 +18,6 @@ repositories: - name: coredns url: https://coredns.github.io/helm - - name: postfinance - url: https://postfinance.github.io/kubelet-csr-approver - releases: - name: kube-prometheus-stack-crds namespace: observability @@ -36,7 +33,8 @@ releases: - commonLabels: helm.toolkit.fluxcd.io/name: *name helm.toolkit.fluxcd.io/namespace: *namespace - needs: ["observability/kube-prometheus-stack-crds"] + needs: + - observability/kube-prometheus-stack-crds - name: &name coredns namespace: &namespace kube-system @@ -47,4 +45,30 @@ releases: - customLabels: helm.toolkit.fluxcd.io/name: *name helm.toolkit.fluxcd.io/namespace: *namespace - needs: ["kube-system/cilium"] + needs: + - kube-system/cilium + + - name: &name flux + namespace: &namespace flux-system + chart: oci://ghcr.io/fluxcd-community/charts/flux2 + version: 2.14.0 + values: + - ../../apps/flux-system/flux/app/helm-values.yaml + - helmController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - kustomizeController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - notificationController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + - sourceController: + labels: + helm.toolkit.fluxcd.io/name: *name + helm.toolkit.fluxcd.io/namespace: *namespace + needs: + - kube-system/coredns