From 157f893e536d324c5f31927868d95a523dbf737b Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 02:31:23 +0000 Subject: [PATCH 1/2] Update kubernetes-mixin digest to 3cb7958 --- jsonnetfile.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonnetfile.json b/jsonnetfile.json index 51e3905..972af35 100644 --- a/jsonnetfile.json +++ b/jsonnetfile.json @@ -7,7 +7,7 @@ "subdir": "" } }, - "version": "50150c585ebee6e4d9cb72218182da8f3c616515" + "version": "3cb7958a56688386e8f6cb0f1258bdb1234797d6" } ], "legacyImports": true, From 7b2822281a067952723463fe746a17ef6b83da78 Mon Sep 17 00:00:00 2001 From: "bonddim-actions[bot]" <169367001+bonddim-actions[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 02:32:48 +0000 Subject: [PATCH 2/2] Update files after Renovate update --- .../k8s-resources-workloads-namespace.json | 8 ++++---- docs/azure/prometheusAlerts.json | 14 +++++++------- docs/azure/prometheusAlerts.yaml | 14 +++++++------- .../k8s-resources-workloads-namespace.json | 8 ++++---- docs/default/prometheusAlerts.json | 14 +++++++------- docs/default/prometheusAlerts.yaml | 14 +++++++------- .../k8s-resources-workloads-namespace.json | 8 ++++---- docs/multicluster/prometheusAlerts.json | 14 +++++++------- docs/multicluster/prometheusAlerts.yaml | 14 +++++++------- jsonnetfile.lock.json | 4 ++-- 10 files changed, 56 insertions(+), 56 deletions(-) diff --git a/docs/azure/dashboards/k8s-resources-workloads-namespace.json b/docs/azure/dashboards/k8s-resources-workloads-namespace.json index a6d9b17..ed72c46 100644 --- a/docs/azure/dashboards/k8s-resources-workloads-namespace.json +++ b/docs/azure/dashboards/k8s-resources-workloads-namespace.json @@ -117,7 +117,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"})", "legendFormat": "quota - requests" }, { @@ -125,7 +125,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"})", "legendFormat": "quota - limits" } ], @@ -412,7 +412,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"})", "legendFormat": "quota - requests" }, { @@ -420,7 +420,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"})", "legendFormat": "quota - limits" } ], diff --git a/docs/azure/prometheusAlerts.json b/docs/azure/prometheusAlerts.json index 5f1396a..5898b41 100644 --- a/docs/azure/prometheusAlerts.json +++ b/docs/azure/prometheusAlerts.json @@ -427,7 +427,7 @@ "rules": [ { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000)", "for": "2m", "labels": { "long": "1h", @@ -442,7 +442,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000)", "for": "15m", "labels": { "long": "6h", @@ -457,7 +457,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000)", "for": "1h", "labels": { "long": "1d", @@ -472,7 +472,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000)", "for": "3h", "labels": { "long": "3d", @@ -492,7 +492,7 @@ "rules": [ { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", "for": "5m", "labels": { "severity": "warning" @@ -505,7 +505,7 @@ }, { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", "for": "5m", "labels": { "severity": "critical" @@ -556,7 +556,7 @@ }, { "alert": "KubeAPITerminatedRequests", - "expr": "sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum(rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", + "expr": "sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", "for": "5m", "labels": { "severity": "warning" diff --git a/docs/azure/prometheusAlerts.yaml b/docs/azure/prometheusAlerts.yaml index 17b53a5..880d1ef 100644 --- a/docs/azure/prometheusAlerts.yaml +++ b/docs/azure/prometheusAlerts.yaml @@ -320,7 +320,7 @@ groups: rules: - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000) for: 2m labels: long: 1h @@ -332,7 +332,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000) for: 15m labels: long: 6h @@ -344,7 +344,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000) for: 1h labels: long: 1d @@ -356,7 +356,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000) for: 3h labels: long: 3d @@ -370,7 +370,7 @@ groups: rules: - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 for: 5m labels: severity: warning @@ -380,7 +380,7 @@ groups: summary: Client certificate is about to expire. - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 for: 5m labels: severity: critical @@ -419,7 +419,7 @@ groups: summary: Target disappeared from Prometheus target discovery. - alert: "KubeAPITerminatedRequests" expr: |- - sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 for: 5m labels: severity: warning diff --git a/docs/default/dashboards/k8s-resources-workloads-namespace.json b/docs/default/dashboards/k8s-resources-workloads-namespace.json index 932018b..2c09343 100644 --- a/docs/default/dashboards/k8s-resources-workloads-namespace.json +++ b/docs/default/dashboards/k8s-resources-workloads-namespace.json @@ -117,7 +117,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"})", "legendFormat": "quota - requests" }, { @@ -125,7 +125,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"})", "legendFormat": "quota - limits" } ], @@ -412,7 +412,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"})", "legendFormat": "quota - requests" }, { @@ -420,7 +420,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"})", "legendFormat": "quota - limits" } ], diff --git a/docs/default/prometheusAlerts.json b/docs/default/prometheusAlerts.json index fc40b4e..ebec107 100644 --- a/docs/default/prometheusAlerts.json +++ b/docs/default/prometheusAlerts.json @@ -427,7 +427,7 @@ "rules": [ { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000)", "for": "2m", "labels": { "long": "1h", @@ -442,7 +442,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000)", "for": "15m", "labels": { "long": "6h", @@ -457,7 +457,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000)", "for": "1h", "labels": { "long": "1d", @@ -472,7 +472,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000)", "for": "3h", "labels": { "long": "3d", @@ -492,7 +492,7 @@ "rules": [ { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", "for": "5m", "labels": { "severity": "warning" @@ -505,7 +505,7 @@ }, { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", "for": "5m", "labels": { "severity": "critical" @@ -556,7 +556,7 @@ }, { "alert": "KubeAPITerminatedRequests", - "expr": "sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum(rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", + "expr": "sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", "for": "5m", "labels": { "severity": "warning" diff --git a/docs/default/prometheusAlerts.yaml b/docs/default/prometheusAlerts.yaml index b24308d..1881411 100644 --- a/docs/default/prometheusAlerts.yaml +++ b/docs/default/prometheusAlerts.yaml @@ -320,7 +320,7 @@ groups: rules: - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000) for: 2m labels: long: 1h @@ -332,7 +332,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000) for: 15m labels: long: 6h @@ -344,7 +344,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000) for: 1h labels: long: 1d @@ -356,7 +356,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000) for: 3h labels: long: 3d @@ -370,7 +370,7 @@ groups: rules: - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 for: 5m labels: severity: warning @@ -380,7 +380,7 @@ groups: summary: Client certificate is about to expire. - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 for: 5m labels: severity: critical @@ -419,7 +419,7 @@ groups: summary: Target disappeared from Prometheus target discovery. - alert: "KubeAPITerminatedRequests" expr: |- - sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 for: 5m labels: severity: warning diff --git a/docs/multicluster/dashboards/k8s-resources-workloads-namespace.json b/docs/multicluster/dashboards/k8s-resources-workloads-namespace.json index a6d9b17..ed72c46 100644 --- a/docs/multicluster/dashboards/k8s-resources-workloads-namespace.json +++ b/docs/multicluster/dashboards/k8s-resources-workloads-namespace.json @@ -117,7 +117,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"})", "legendFormat": "quota - requests" }, { @@ -125,7 +125,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"})", "legendFormat": "quota - limits" } ], @@ -412,7 +412,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"})", "legendFormat": "quota - requests" }, { @@ -420,7 +420,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"})", "legendFormat": "quota - limits" } ], diff --git a/docs/multicluster/prometheusAlerts.json b/docs/multicluster/prometheusAlerts.json index 4f16fef..a280775 100644 --- a/docs/multicluster/prometheusAlerts.json +++ b/docs/multicluster/prometheusAlerts.json @@ -427,7 +427,7 @@ "rules": [ { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000)", "for": "2m", "labels": { "long": "1h", @@ -442,7 +442,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000)", "for": "15m", "labels": { "long": "6h", @@ -457,7 +457,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000)", "for": "1h", "labels": { "long": "1d", @@ -472,7 +472,7 @@ }, { "alert": "KubeAPIErrorBudgetBurn", - "expr": "sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)", + "expr": "sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000)", "for": "3h", "labels": { "long": "3d", @@ -492,7 +492,7 @@ "rules": [ { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 604800", "for": "5m", "labels": { "severity": "warning" @@ -505,7 +505,7 @@ }, { "alert": "KubeClientCertificateExpiration", - "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", + "expr": "apiserver_client_certificate_expiration_seconds_count{job=\"kube-apiserver\"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"kube-apiserver\"}[5m]))) < 86400", "for": "5m", "labels": { "severity": "critical" @@ -556,7 +556,7 @@ }, { "alert": "KubeAPITerminatedRequests", - "expr": "sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum(rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", + "expr": "sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job=\"kube-apiserver\"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job=\"kube-apiserver\"}[10m]))) > 0.20", "for": "5m", "labels": { "severity": "warning" diff --git a/docs/multicluster/prometheusAlerts.yaml b/docs/multicluster/prometheusAlerts.yaml index f66688d..e5c0779 100644 --- a/docs/multicluster/prometheusAlerts.yaml +++ b/docs/multicluster/prometheusAlerts.yaml @@ -320,7 +320,7 @@ groups: rules: - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000) for: 2m labels: long: 1h @@ -332,7 +332,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000) for: 15m labels: long: 6h @@ -344,7 +344,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000) for: 1h labels: long: 1d @@ -356,7 +356,7 @@ groups: summary: The API server is burning too much error budget. - alert: "KubeAPIErrorBudgetBurn" expr: |- - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) and on(cluster) sum by(cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000) for: 3h labels: long: 3d @@ -370,7 +370,7 @@ groups: rules: - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 for: 5m labels: severity: warning @@ -380,7 +380,7 @@ groups: summary: Client certificate is about to expire. - alert: "KubeClientCertificateExpiration" expr: |- - apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 + apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 for: 5m labels: severity: critical @@ -419,7 +419,7 @@ groups: summary: Target disappeared from Prometheus target discovery. - alert: "KubeAPITerminatedRequests" expr: |- - sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / (sum by(cluster) (rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m]))) > 0.20 for: 5m labels: severity: warning diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 936b19e..fbd8f41 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -48,8 +48,8 @@ "subdir": "" } }, - "version": "50150c585ebee6e4d9cb72218182da8f3c616515", - "sum": "0g1pn3gGq2yZyeUTx+zniK/D7jMKbAnqJ83Lke+uJ6o=" + "version": "3cb7958a56688386e8f6cb0f1258bdb1234797d6", + "sum": "f+GOrDpxTRmyYkaZKy6CCwqGoCs9MMCmEGT1cTJ0m6k=" } ], "legacyImports": false