Skip to content

Commit

Permalink
nopo11y dashboard uid and alert changes (#5)
Browse files Browse the repository at this point in the history
* Added _helpers.tpl to generate grafana dashboard uid upto 40 characters
* Removed duplicate alerts from the chart
* Updated grafana dashboard uid
* Updated alerts with the new dashboard uid
* Updated helm chart version to 1.0.1
* Updated helm chart version to 1.0.1
  • Loading branch information
shehbaz-pathan authored May 22, 2024
1 parent 911f4d9 commit 4f0eece
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 67 deletions.
4 changes: 2 additions & 2 deletions charts/nopo11y/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.0
version: 1.0.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.0.0"
appVersion: "1.0.1"
3 changes: 3 additions & 0 deletions charts/nopo11y/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{- define "dashboard-uid" -}}
{{- printf "%s-%s" .Release.Name .Release.Namespace | trunc 40 -}}
{{- end -}}
68 changes: 4 additions & 64 deletions charts/nopo11y/templates/defaultAlerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,66 +10,6 @@ spec:
groups:
- name: {{ .Values.appLabel }}-default-alert-rules
rules:
- alert: {{ .Values.appLabel }}HighCpuUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80
{{- else }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 80
{{- end }}
for: 10m
annotations:
description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes .
summary: CPU Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: warning
- alert: {{ .Values.appLabel }}HighCpuUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90
{{- else }}
expr: 100 * max(rate(container_cpu_usage_seconds_total{pod=~"{{ .Values.deploymentName }}.*"}[5m])/ on (container, pod) kube_pod_container_resource_requests{resource="cpu"}) by (pod) > 90
{{- end }}
for: 10m
annotations:
description: CPU utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes.
summary: CPU Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: critical
- alert: {{ .Values.appLabel }}HighMemoryUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80
{{- else }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 80
{{- end }}
for: 10m
annotations:
description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 80% from last 5 minutes.
summary: Memory Utilization went over 80% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: warning
- alert: {{ .Values.appLabel }}HighMemoryUtilization
{{- if .Values.includeReleaseNameInMetricsLabels }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Release.Name }}-{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90
{{- else }}
expr: 100 * max( container_memory_working_set_bytes{pod=~"{{ .Values.deploymentName }}.*"} / on (container, pod) kube_pod_container_resource_limits{resource="memory"}) by (pod) > 90
{{- end }}
for: 10m
annotations:
description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is above 90% from last 5 minutes.
summary: Memory Utilization went over 90% for pod {{ "{{" }} $labels.pod {{ "}}" }}.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
{{- end }}
labels:
severity: critical
{{- if .Values.istioMetrics.enabled }}
- alert: {{ .Values.appLabel }}High5xxErrorRate
{{- if .Values.includeReleaseNameInMetricsLabels }}
Expand All @@ -81,7 +21,7 @@ spec:
description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 5xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }}
{{- end }}
labels:
severity: critical
Expand All @@ -94,7 +34,7 @@ spec:
for: 5m
annotations:
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview
dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }}
{{- end }}
description: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx errors rate from last 5 minutes.
summary: {{- if .Values.includeReleaseNameInMetricsLabels }} {{ .Release.Name }}-{{ .Values.appLabel }}{{- else }} {{ .Values.appLabel }}{{- end }} service is experiencing high 4xx error rate.
Expand All @@ -108,7 +48,7 @@ spec:
description: {{ .Values.appLabel }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{ .Values.appLabel }} is experiencing high 5xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview
dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }}
{{- end }}
labels:
severity: critical
Expand All @@ -119,7 +59,7 @@ spec:
description: {{ .Values.appLabel }} service is experiencing high 4xx errors rate from last 5 minutes.
summary: {{ .Values.appLabel }} service is experiencing high 4xx error rate.
{{- if .Values.grafanaURL }}
dashboard: {{ .Values.grafanaURL }}/d/{{ .Values.appLabel }}-overview
dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" . }}
{{- end }}
labels:
severity: warning
Expand Down
2 changes: 1 addition & 1 deletion charts/nopo11y/templates/defaultDashboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1540,7 +1540,7 @@ data:
"timepicker": {},
"timezone": "",
"title": "{{- if .Values.includeReleaseNameInMetricsLabels }}{{ title .Release.Name }}-{{ title .Values.appLabel }}{{- else }}{{ title .Values.appLabel }}{{- end }} Overview - Dashboard",
"uid": "{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ .Values.appLabel }}{{- else }}{{ .Values.appLabel }}{{- end }}-overview",
"uid": "{{ include "dashboard-uid" . }},
"version": 10,
"weekStart": ""
}
Expand Down

0 comments on commit 4f0eece

Please sign in to comment.