Skip to content

Commit

Permalink
Minor changes in nopo11y-stack (#3)
Browse files Browse the repository at this point in the history
* Updated SLO dashboard

* Removed not required metrics from the alerts

* Updated chart version to 1.1.1

* Updated nopo11y health check values
  • Loading branch information
shehbaz-pathan authored May 17, 2024
1 parent f823951 commit f20d4c9
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 26 deletions.
2 changes: 1 addition & 1 deletion charts/nopo11y-stack/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ dependencies:
description: A Helm chart for observability stack
name: nopo11y-stack
type: application
version: 1.1.0
version: 1.1.1
14 changes: 7 additions & 7 deletions charts/nopo11y-stack/dashboards/slo-details.json
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "name"
Expand Down Expand Up @@ -587,7 +587,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "name"
Expand Down Expand Up @@ -658,7 +658,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "value_and_name"
Expand Down Expand Up @@ -729,7 +729,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "value_and_name"
Expand Down Expand Up @@ -800,7 +800,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "value_and_name"
Expand Down Expand Up @@ -1096,7 +1096,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "value_and_name"
Expand Down Expand Up @@ -1181,7 +1181,7 @@
"lastNotNull"
],
"fields": "",
"values": false
"values": true
},
"text": {},
"textMode": "value_and_name"
Expand Down
20 changes: 2 additions & 18 deletions charts/nopo11y-stack/templates/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,10 @@ spec:
- alert: HighCpuUtilization
expr: |
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
* on(pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
rate(container_cpu_usage_seconds_total{container!=""}[5m])
) by (pod)
/sum(
kube_pod_container_resource_limits{resource="cpu"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod) * 100 > 75
for: 5m
annotations:
Expand All @@ -30,14 +26,10 @@ spec:
- alert: HighCpuUtilization
expr: |
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
* on(pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
rate(container_cpu_usage_seconds_total{container!=""}[5m])
) by (pod)
/sum(
kube_pod_container_resource_limits{resource="cpu"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod) * 100 > 90
for: 5m
annotations:
Expand All @@ -49,13 +41,9 @@ spec:
expr: |
sum(
container_memory_working_set_bytes{container!="", image!=""}
* on(pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod)
/sum(
kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod) * 100 > 75
for: 5m
annotations:
Expand All @@ -66,13 +54,9 @@ spec:
- alert: HighMemoryUtilization
expr: sum(
container_memory_working_set_bytes{container!="", image!=""}
* on(pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod)
/sum(
kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel
) by (pod) * 100 > 90
annotations:
description: Memory utilization of pod {{ "{{" }} $labels.pod {{ "}}" }} is went above 90%.
Expand Down
1 change: 1 addition & 0 deletions charts/nopo11y-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16028,6 +16028,7 @@ nopo11y_health_check:
memory: 100Mi
## You can provide the below environment variables to the health check
## NAMESPACE - default is default namespace
## HEALTHY_PODS_PERCENTAGE - default is 30%
## HEALTHY_POD_CPU_UTILIZATION_THRESHOLD - default is 80%
## HEALTHY_POD_MEMORY_UTILIZATION_THRESHOLD - default is 80%
## HEALTHY_PVC_FREE_SPACE - default is 200mb
Expand Down

0 comments on commit f20d4c9

Please sign in to comment.