From 4f0eece0853e915225731a47cc0d4077ee09366c Mon Sep 17 00:00:00 2001 From: shehbaz-pathan <46107507+shehbaz-pathan@users.noreply.github.com> Date: Wed, 22 May 2024 18:03:57 +0530 Subject: [PATCH] nopo11y dashboard uid and alert changes (#5) * Added _helpers.tpl to generate grafana dashboard uid upto 40 characters * Removed duplicate alerts from the chart * Updated grafana dashboard uid * Updated alerts with the new dashboard uid * Updated helm chart version to 1.0.1 * Updated helm chart version to 1.0.1 --- charts/nopo11y/Chart.yaml | 4 +- charts/nopo11y/templates/_helpers.tpl | 3 + charts/nopo11y/templates/defaultAlerts.yaml | 68 ++----------------- .../nopo11y/templates/defaultDashboard.yaml | 2 +- 4 files changed, 10 insertions(+), 67 deletions(-) create mode 100644 charts/nopo11y/templates/_helpers.tpl diff --git a/charts/nopo11y/Chart.yaml b/charts/nopo11y/Chart.yaml index 48836cd..f2daa66 100644 --- a/charts/nopo11y/Chart.yaml +++ b/charts/nopo11y/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.0 +version: 1.0.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.0.0" +appVersion: "1.0.1" diff --git a/charts/nopo11y/templates/_helpers.tpl b/charts/nopo11y/templates/_helpers.tpl new file mode 100644 index 0000000..f380fe7 --- /dev/null +++ b/charts/nopo11y/templates/_helpers.tpl @@ -0,0 +1,3 @@ +{{- define "dashboard-uid" -}} +{{- printf "%s-%s" .Release.Name .Release.Namespace | trunc 40 -}} +{{- end -}} \ No newline at end of file diff --git a/charts/nopo11y/templates/defaultAlerts.yaml b/charts/nopo11y/templates/defaultAlerts.yaml index fa04a87..74ca8ac 100644 --- a/charts/nopo11y/templates/defaultAlerts.yaml +++ b/charts/nopo11y/templates/defaultAlerts.yaml @@ -10,66 +10,6 @@ spec: groups: - name: {{ .Values.appLabel }}-default-alert-rules rules: - - alert: {{ .Values.appLabel }}HighCpuUtilization - {{- if .Values.includeReleaseNameInMetricsLabels }} - expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80 - {{- else }} - expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80 - {{- end }} - for: 10m - annotations: - description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes . - summary: CPU Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}. - {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview - {{- end }} - labels: - severity: warning - - alert: {{ .Values.appLabel }}HighCpuUtilization - {{- if .Values.includeReleaseNameInMetricsLabels }} - expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90 - {{- else }} - expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90 - {{- end }} - for: 10m - annotations: - description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes. - summary: CPU Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}. - {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview - {{- end }} - labels: - severity: critical - - alert: {{ .Values.appLabel }}HighMemoryUtilization - {{- if .Values.includeReleaseNameInMetricsLabels }} - expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80 - {{- else }} - expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80 - {{- end }} - for: 10m - annotations: - description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes. - summary: Memory Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}. - {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview - {{- end }} - labels: - severity: warning - - alert: {{ .Values.appLabel }}HighMemoryUtilization - {{- if .Values.includeReleaseNameInMetricsLabels }} - expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90 - {{- else }} - expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90 - {{- end }} - for: 10m - annotations: - description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes. - summary: Memory Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}. - {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview - {{- end }} - labels: - severity: critical {{- if .Values.istioMetrics.enabled }} - alert: {{ .Values.appLabel }}High5xxErrorRate {{- if .Values.includeReleaseNameInMetricsLabels }} @@ -81,7 +21,7 @@ spec: description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx errors rate from last 5 minutes. summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx error rate. {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview + dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }} {{- end }} labels: severity: critical @@ -94,7 +34,7 @@ spec: for: 5m annotations: {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview + dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }} {{- end }} description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx errors rate from last 5 minutes. summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx error rate. @@ -108,7 +48,7 @@ spec: description: {{ .Values.appLabel }} service is experiencing high 5xx errors rate from last 5 minutes. summary: {{ .Values.appLabel }} is experiencing high 5xx error rate. {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview + dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }} {{- end }} labels: severity: critical @@ -119,7 +59,7 @@ spec: description: {{ .Values.appLabel }} service is experiencing high 4xx errors rate from last 5 minutes. summary: {{ .Values.appLabel }} service is experiencing high 4xx error rate. {{- if .Values.grafanaURL }} - dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview + dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }} {{- end }} labels: severity: warning diff --git a/charts/nopo11y/templates/defaultDashboard.yaml b/charts/nopo11y/templates/defaultDashboard.yaml index b8225de..fc6cfba 100644 --- a/charts/nopo11y/templates/defaultDashboard.yaml +++ b/charts/nopo11y/templates/defaultDashboard.yaml @@ -1540,7 +1540,7 @@ data: "timepicker": {}, "timezone": "", "title": "{{- if .Values.includeReleaseNameInMetricsLabels }}{{ title .Release.Name }}-{{ title .Values.appLabel }}{{- else }}{{ title .Values.appLabel }}{{- end }} Overview - Dashboard", - "uid": "{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview", + "uid": "{{ include "dashboard-uid" . }}, "version": 10, "weekStart": "" }