Skip to content

Commit

Permalink
feat(kps): move to alertmanager spec
Browse files Browse the repository at this point in the history
  • Loading branch information
buroa committed Dec 11, 2024
1 parent 41e9634 commit 2f919df
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 91 deletions.
2 changes: 1 addition & 1 deletion kubernetes/apps/monitoring/karma/ks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ spec:
wait: true
interval: 30m
retryInterval: 1m
timeout: 15m
timeout: 5m
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
---
apiVersion: monitoring.coreos.com/v1alpha1
kind: AlertmanagerConfig
metadata:
name: alertmanager
spec:
route:
groupBy: ["alertname", "job"]
groupInterval: 10m
groupWait: 1m
receiver: pushover
repeatInterval: 12h
routes:
- receiver: "null"
matchers: [{name: alertname, value: InfoInhibitor, matchType: =}]
- receiver: heartbeat
groupInterval: 15s
groupWait: 0s
repeatInterval: 5m
matchers: [{name: alertname, value: Watchdog, matchType: =}]
- receiver: pushover
matchers: [{name: severity, value: critical, matchType: =}]

inhibitRules:
- equal: ["alertname", "namespace"]
sourceMatch:
- name: severity
value: critical
matchType: =
- name: severity
value: warning
matchType: =~

receivers:
- name: "null"
- name: heartbeat
webhookConfigs:
- urlSecret:
name: &secret alertmanager-secret
key: ALERTMANAGER_HEARTBEAT_URL
- name: pushover
pushoverConfigs:
- html: true
message: |-
{{- range .Alerts }}
{{- if ne .Annotations.description "" }}
{{ .Annotations.description }}
{{- else if ne .Annotations.summary "" }}
{{ .Annotations.summary }}
{{- else if ne .Annotations.message "" }}
{{ .Annotations.message }}
{{- else }}
Alert description not available
{{- end }}
{{- if gt (len .Labels.SortedPairs) 0 }}
<small>
{{- range .Labels.SortedPairs }}
<b>{{ .Name }}:</b> {{ .Value }}
{{- end }}
</small>
{{- end }}
{{- end }}
priority: |-
{{ if eq .Status "firing" }}1{{ else }}0{{ end }}
sendResolved: true
sound: gamelan
title: >-
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}]
{{ .CommonLabels.alertname }}
ttl: 3600s
token:
name: *secret
key: ALERTMANAGER_PUSHOVER_APP_TOKEN
userKey:
name: *secret
key: ALERTMANAGER_PUSHOVER_USER_KEY
urlTitle: View in Alertmanager
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@ kind: ExternalSecret
metadata:
name: alertmanager
spec:
refreshInterval: 5m
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: alertmanager-secret
creationPolicy: Owner
template:
templateFrom:
- configMap:
name: alertmanager-config-tpl
items:
- key: alertmanager.yaml
engineVersion: v2
data:
ALERTMANAGER_HEARTBEAT_URL: "{{ .ALERTMANAGER_HEARTBEAT_URL }}"
ALERTMANAGER_PUSHOVER_APP_TOKEN: "{{ .ALERTMANAGER_PUSHOVER_APP_TOKEN }}"
ALERTMANAGER_PUSHOVER_USER_KEY: "{{ .ALERTMANAGER_PUSHOVER_USER_KEY }}"
dataFrom:
- extract:
key: alertmanager
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ spec:
ingressClassName: internal
hosts: ["am.ktwo.io"]
alertmanagerSpec:
useExistingSecret: true
configSecret: alertmanager-secret
alertmanagerConfiguration:
name: alertmanager
global:
resolveTimeout: 5m
externalUrl: https://am.ktwo.io
storage:
volumeClaimTemplate:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./alertmanagerconfig.yaml
- ./externalsecret.yaml
- ./helmrelease.yaml
configMapGenerator:
- name: alertmanager-config-tpl
files:
- ./resources/alertmanager.yaml
generatorOptions:
disableNameSuffixHash: true

This file was deleted.

3 changes: 2 additions & 1 deletion kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ spec:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: prometheus-operator-crds
- name: rook-ceph-cluster
path: ./kubernetes/apps/monitoring/kube-prometheus-stack/app
prune: true
Expand Down Expand Up @@ -42,4 +43,4 @@ spec:
wait: true
interval: 30m
retryInterval: 1m
timeout: 15m
timeout: 5m
1 change: 1 addition & 0 deletions kubernetes/apps/monitoring/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ resources:
- ./kromgo/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./loki/ks.yaml
- ./prometheus-operator-crds/ks.yaml
- ./promtail/ks.yaml
- ./unpoller/ks.yaml
- ./exporters
1 change: 0 additions & 1 deletion kubernetes/apps/monitoring/loki/app/helmrelease.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ metadata:
name: loki
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: loki
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/apps/monitoring/loki/ks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ spec:
wait: true
interval: 30m
retryInterval: 1m
timeout: 15m
timeout: 5m
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus-operator-crds
spec:
interval: 30m
chart:
spec:
chart: prometheus-operator-crds
version: 16.0.1
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
22 changes: 22 additions & 0 deletions kubernetes/apps/monitoring/prometheus-operator-crds/ks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app prometheus-operator-crds
namespace: flux-system
spec:
targetNamespace: monitoring
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: rook-ceph-cluster
path: ./kubernetes/apps/monitoring/prometheus-operator-crds/app
prune: false # never should be deleted
sourceRef:
kind: GitRepository
name: k8s-gitops
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
4 changes: 2 additions & 2 deletions kubernetes/apps/networking/nginx/ks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ spec:
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
timeout: 15m
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
Expand All @@ -63,4 +63,4 @@ spec:
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
timeout: 15m

0 comments on commit 2f919df

Please sign in to comment.