From dcbb3cd9cef50e41ae9cf8262046ecdd11357370 Mon Sep 17 00:00:00 2001 From: itaynvn Date: Thu, 9 Nov 2023 14:05:06 +0200 Subject: [PATCH 1/6] init --- api/v1/monitoring.go | 2 +- pkg/monitoring/tmpl/kube-state-metrics/clusterrole.tpl | 8 ++++++++ pkg/monitoring/tmpl/kube-state-metrics/dep.tpl | 4 ++-- pkg/monitoring/tmpl/kube-state-metrics/servicemonitor.tpl | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/api/v1/monitoring.go b/api/v1/monitoring.go index a65301f7..167878be 100644 --- a/api/v1/monitoring.go +++ b/api/v1/monitoring.go @@ -179,7 +179,7 @@ var infraMonitoringDefault = CnvrgInfraMonitoring{ }, KubeStateMetrics: KubeStateMetrics{ Enabled: false, - Image: "kube-state-metrics:v1.9.7", + Image: "kube-state-metrics:v2.8.1", }, NodeExporter: NodeExporter{ Enabled: false, diff --git a/pkg/monitoring/tmpl/kube-state-metrics/clusterrole.tpl b/pkg/monitoring/tmpl/kube-state-metrics/clusterrole.tpl index eb4d5932..1071f747 100644 --- a/pkg/monitoring/tmpl/kube-state-metrics/clusterrole.tpl +++ b/pkg/monitoring/tmpl/kube-state-metrics/clusterrole.tpl @@ -112,6 +112,14 @@ rules: - networking.k8s.io resources: - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases verbs: - list - watch diff --git a/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl b/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl index b127af5b..ff61d6d0 100644 --- a/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl +++ b/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl @@ -7,7 +7,7 @@ metadata: {{- end }} labels: app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: v1.9.7 + app.kubernetes.io/version: v2.8.1 {{- range $k, $v := .Spec.Labels }} {{$k}}: "{{$v}}" {{- end }} @@ -26,7 +26,7 @@ spec: {{- end }} labels: app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: v1.9.7 + app.kubernetes.io/version: v2.8.1 {{- range $k, $v := .Spec.Labels }} {{$k}}: "{{$v}}" {{- end }} diff --git a/pkg/monitoring/tmpl/kube-state-metrics/servicemonitor.tpl b/pkg/monitoring/tmpl/kube-state-metrics/servicemonitor.tpl index e995faa4..1d47448b 100644 --- a/pkg/monitoring/tmpl/kube-state-metrics/servicemonitor.tpl +++ b/pkg/monitoring/tmpl/kube-state-metrics/servicemonitor.tpl @@ -7,7 +7,7 @@ metadata: {{- end }} labels: app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.7 + app.kubernetes.io/version: 2.8.1 cnvrg-infra-prometheus: {{ .Name }}-{{ ns .}} {{- range $k, $v := .Spec.Labels }} {{$k}}: "{{$v}}" From 94157fe39ce8679313ecbeb50ae805323974f37b Mon Sep 17 00:00:00 2001 From: mahmod jabareen Date: Fri, 15 Dec 2023 12:08:19 +0200 Subject: [PATCH 2/6] replaceing quiries to be working with kube-state-meticsv2.8.1 and v1.7.9 --- .../dashboards-data/grafana-fluentbit.json | 10 +++---- .../dashboards-data/grafana-idle-metrics.json | 2 +- .../grafana-k8s-resources-cluster.json | 26 +++++++++---------- .../grafana-k8s-resources-namespace.json | 24 ++++++++--------- .../grafana-k8s-resources-node.json | 16 ++++++------ .../grafana-k8s-resources-pod.json | 24 ++++++++--------- .../grafana-k8s-resources-workload.json | 16 ++++++------ ...ana-k8s-resources-workloads-namespace.json | 16 ++++++------ .../tmpl/prometheus/instance/ccp/rules.tpl | 8 +++--- .../tmpl/prometheus/instance/infra/rules.tpl | 8 +++--- 10 files changed, 75 insertions(+), 75 deletions(-) diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-fluentbit.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-fluentbit.json index 1dda43ee..fd67a4e8 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-fluentbit.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-fluentbit.json @@ -450,7 +450,7 @@ "tableColumn": "", "targets": [ { - "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)", + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable{resource=\"memory\"})", "instant": false, "refId": "A" } @@ -541,7 +541,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"memory\"})", "instant": false, "refId": "A" } @@ -632,7 +632,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\"})", "instant": false, "refId": "A" } @@ -723,7 +723,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"cpu\"})", "instant": false, "refId": "A" } @@ -814,7 +814,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\"})", "instant": false, "refId": "A" } diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-idle-metrics.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-idle-metrics.json index 1d20dbef..d7f7ad16 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-idle-metrics.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-idle-metrics.json @@ -264,7 +264,7 @@ "pluginVersion": "6.7.4", "targets": [ { - "expr": "(avg_over_time(container_memory_working_set_bytes{container=\"main\", name=~\".*$pod.*\"}[$idle_timeout]) / on (instanse) kube_pod_container_resource_limits_memory_bytes{pod=~\".*$pod.*\"}) * 100", + "expr": "(avg_over_time(container_memory_working_set_bytes{container=\"main\", name=~\".*$pod.*\"}[$idle_timeout]) / on (instanse) kube_pod_container_resource_limits{resource=\"memory\", pod=~\".*$pod.*\"}) * 100", "interval": "", "legendFormat": "", "refId": "A" diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json index 38fc9848..165ae6dd 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json @@ -132,7 +132,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\"cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -212,7 +212,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\", cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -292,7 +292,7 @@ "steppedLine": false, "targets": [ { - "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -372,7 +372,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -452,7 +452,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -820,7 +820,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -829,7 +829,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{resource=\"cpu\"{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -838,7 +838,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -847,7 +847,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1219,7 +1219,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1228,7 +1228,7 @@ "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1237,7 +1237,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1246,7 +1246,7 @@ "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json index 411e3a62..a00f9559 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json @@ -51,7 +51,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{resource=\"cpu\"cluster=\"$cluster\", namespace=\"$namespace\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -131,7 +131,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -211,7 +211,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{resource=\"memory\", namespace=\"$namespace\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -291,7 +291,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{resource=\"memory\",namespace=\"$namespace\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -643,7 +643,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -652,7 +652,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -661,7 +661,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -670,7 +670,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1077,7 +1077,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1086,7 +1086,7 @@ "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"memory\"namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1095,7 +1095,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -1104,7 +1104,7 @@ "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"memory\",namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-node.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-node.json index 8e30d4c4..8d3834ab 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-node.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-node.json @@ -275,7 +275,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -284,7 +284,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -293,7 +293,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -302,7 +302,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -673,7 +673,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -682,7 +682,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"memory\",node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -691,7 +691,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -700,7 +700,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"memory\",node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-pod.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-pod.json index 2c165393..e6afe685 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-pod.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-pod.json @@ -76,7 +76,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", @@ -84,7 +84,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", @@ -410,7 +410,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -419,7 +419,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -428,7 +428,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -437,7 +437,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -560,7 +560,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", @@ -568,7 +568,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", @@ -844,7 +844,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -853,7 +853,7 @@ "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -862,7 +862,7 @@ "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -871,7 +871,7 @@ "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workload.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workload.json index b9f36c7b..038845e4 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workload.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workload.json @@ -275,7 +275,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -284,7 +284,7 @@ "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -293,7 +293,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -302,7 +302,7 @@ "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -622,7 +622,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -631,7 +631,7 @@ "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -640,7 +640,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -649,7 +649,7 @@ "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workloads-namespace.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workloads-namespace.json index bea390c4..0b4c9995 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workloads-namespace.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-workloads-namespace.json @@ -354,7 +354,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -363,7 +363,7 @@ "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -372,7 +372,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -381,7 +381,7 @@ "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -780,7 +780,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -789,7 +789,7 @@ "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -798,7 +798,7 @@ "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -807,7 +807,7 @@ "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{resource=\"memory\",cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl index 0fb046e1..6e3c2a58 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl @@ -608,24 +608,24 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} + kube_pod_container_resource_requests{resource="memory", job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) - record: namespace:kube_pod_container_resource_requests_memory_bytes:sum + record: sum(kube_pod_container_resource_requests{resource="memory"}) by (namespace) - expr: | sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) - record: namespace:kube_pod_container_resource_requests_cpu_cores:sum + record: sum(kube_pod_container_resource_requests{resource="cpu"}) by (namespace) - expr: | max by (cluster, namespace, workload, pod) ( label_replace( diff --git a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl index 954839bc..776c7623 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl @@ -608,24 +608,24 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} + kube_pod_container_resource_requests{resource="memory", job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) - record: namespace:kube_pod_container_resource_requests_memory_bytes:sum + record: sum(kube_pod_container_resource_requests{resource="memory"}) by (namespace) - expr: | sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) - record: namespace:kube_pod_container_resource_requests_cpu_cores:sum + record: sum(kube_pod_container_resource_requests{resource="cpu"}) by (namespace) - expr: | max by (cluster, namespace, workload, pod) ( label_replace( From 81d293166043520931946a7985af97da8ce9ff01 Mon Sep 17 00:00:00 2001 From: mahmod jabareen Date: Thu, 21 Dec 2023 15:31:19 +0200 Subject: [PATCH 3/6] fixing some quries --- .../dashboards-data/grafana-k8s-resources-cluster.json | 4 ++-- .../dashboards-data/grafana-k8s-resources-namespace.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json index 165ae6dd..e9112c38 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-cluster.json @@ -132,7 +132,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\"cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\", cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -829,7 +829,7 @@ "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{resource=\"cpu\"{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\") by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json index a00f9559..5ac67967 100644 --- a/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json +++ b/pkg/monitoring/tmpl/grafana/dashboards-data/grafana-k8s-resources-namespace.json @@ -51,7 +51,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{resource=\"cpu\"cluster=\"$cluster\", namespace=\"$namespace\"})", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{resource=\"cpu\",cluster=\"$cluster\", namespace=\"$namespace\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -1086,7 +1086,7 @@ "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"memory\"namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{resource=\"memory\",namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, From a12f0aa0ac04d0048aa22baec531187dcdb6f250 Mon Sep 17 00:00:00 2001 From: mahmod jabareen Date: Tue, 26 Dec 2023 13:21:29 +0200 Subject: [PATCH 4/6] fixing rules --- pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl | 4 ++-- pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl index 6e3c2a58..85235405 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl @@ -614,7 +614,7 @@ spec: ) ) ) - record: sum(kube_pod_container_resource_requests{resource="memory"}) by (namespace) + record: namespace:kube_pod_container_resource_requests_memory_bytes:sum - expr: | sum by (namespace) ( sum by (namespace, pod) ( @@ -625,7 +625,7 @@ spec: ) ) ) - record: sum(kube_pod_container_resource_requests{resource="cpu"}) by (namespace) + record: namespace:kube_pod_container_resource_requests_cpu_cores:sum - expr: | max by (cluster, namespace, workload, pod) ( label_replace( diff --git a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl index 776c7623..9b53c03b 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl @@ -614,7 +614,7 @@ spec: ) ) ) - record: sum(kube_pod_container_resource_requests{resource="memory"}) by (namespace) + record: namespace:kube_pod_container_resource_requests_memory_bytes:sum - expr: | sum by (namespace) ( sum by (namespace, pod) ( @@ -625,7 +625,7 @@ spec: ) ) ) - record: sum(kube_pod_container_resource_requests{resource="cpu"}) by (namespace) + record: namespace:kube_pod_container_resource_requests_cpu_cores:sum - expr: | max by (cluster, namespace, workload, pod) ( label_replace( From f9522a3d40806fb78ee7d5a24b6cfd9833ab27bb Mon Sep 17 00:00:00 2001 From: mahmod jabareen Date: Thu, 4 Jan 2024 14:48:48 +0200 Subject: [PATCH 5/6] return the main rules for testing --- pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl | 4 ++-- pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl index 85235405..0fb046e1 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/ccp/rules.tpl @@ -608,7 +608,7 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory", job="kube-state-metrics"} + kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) @@ -619,7 +619,7 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) diff --git a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl index 9b53c03b..954839bc 100644 --- a/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl +++ b/pkg/monitoring/tmpl/prometheus/instance/infra/rules.tpl @@ -608,7 +608,7 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory", job="kube-state-metrics"} + kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) @@ -619,7 +619,7 @@ spec: sum by (namespace) ( sum by (namespace, pod) ( max by (namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} ) * on(namespace, pod) group_left() max by (namespace, pod) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) From e4628869cf2c2560f8d7ffff6d5852ca5d04e5fd Mon Sep 17 00:00:00 2001 From: maayan-bouzaglou <104310823+maayan-bouzaglou@users.noreply.github.com> Date: Tue, 30 Jan 2024 10:08:18 +0200 Subject: [PATCH 6/6] DEV-21955 Update dep.tpl --- pkg/monitoring/tmpl/kube-state-metrics/dep.tpl | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl b/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl index ff61d6d0..463d6f6b 100644 --- a/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl +++ b/pkg/monitoring/tmpl/kube-state-metrics/dep.tpl @@ -45,6 +45,7 @@ spec: - --port=8081 - --telemetry-host=127.0.0.1 - --telemetry-port=8082 + - --metric-labels-allowlist=nodes=[*] image: {{ image .Spec.ImageHub .Spec.Monitoring.KubeStateMetrics.Image }} name: kube-state-metrics resources: