From 4b0f169084464420e3bb058c284206b7b380f60b Mon Sep 17 00:00:00 2001 From: Markus Lippert Date: Fri, 22 Sep 2023 10:10:34 +0200 Subject: [PATCH] [kube-prometheus-stack] fix: use correct job name for dashboards to work, make deployment more intuitively (#3672) * fix: use correct job name for dashboards to work, make deployment more intuitively Signed-off-by: Markus Lippert * merge main Signed-off-by: Markus Lippert * bump version Signed-off-by: Markus Lippert --------- Signed-off-by: Markus Lippert --- charts/kube-prometheus-stack/Chart.yaml | 2 +- .../rules-1.14/windows.node.rules.yaml | 52 +++++++++---------- .../rules-1.14/windows.pod.rules.yaml | 12 ++--- charts/kube-prometheus-stack/values.yaml | 26 ++++++++-- 4 files changed, 56 insertions(+), 36 deletions(-) diff --git a/charts/kube-prometheus-stack/Chart.yaml b/charts/kube-prometheus-stack/Chart.yaml index 0d798928b608..2e5047d0b6c6 100644 --- a/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kube-prometheus-stack/Chart.yaml @@ -21,7 +21,7 @@ name: kube-prometheus-stack sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus -version: 51.1.0 +version: 51.1.1 appVersion: v0.68.0 kubeVersion: ">=1.19.0-0" home: https://github.com/prometheus-operator/kube-prometheus diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml index 5a240947a3ba..42bcb1c8d883 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml @@ -26,41 +26,41 @@ spec: rules: - expr: |- count ( - windows_system_system_up_time{job="{{ .Values.windowsMonitoring.job }}"} + windows_system_system_up_time{job="windows-exporter"} ) record: node:windows_node:sum - expr: |- count by (instance) (sum by (instance, core) ( - windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}"} + windows_cpu_time_total{job="windows-exporter"} )) record: node:windows_node_num_cpu:sum - - expr: 1 - avg(rate(windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}",mode="idle"}[1m])) + - expr: 1 - avg(rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m])) record: :windows_node_cpu_utilisation:avg1m - expr: |- 1 - avg by (instance) ( - rate(windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}",mode="idle"}[1m]) + rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m]) ) record: node:windows_node_cpu_utilisation:avg1m - expr: |- 1 - - sum(windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + sum(windows_memory_available_bytes{job="windows-exporter"}) / - sum(windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + sum(windows_os_visible_memory_bytes{job="windows-exporter"}) record: ':windows_node_memory_utilisation:' - - expr: sum(windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_cache_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + - expr: sum(windows_memory_available_bytes{job="windows-exporter"} + windows_memory_cache_bytes{job="windows-exporter"}) record: :windows_node_memory_MemFreeCached_bytes:sum - - expr: (windows_memory_cache_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_modified_page_list_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_core_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_normal_priority_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_reserve_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + - expr: (windows_memory_cache_bytes{job="windows-exporter"} + windows_memory_modified_page_list_bytes{job="windows-exporter"} + windows_memory_standby_cache_core_bytes{job="windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="windows-exporter"}) record: node:windows_node_memory_totalCached_bytes:sum - - expr: sum(windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + - expr: sum(windows_os_visible_memory_bytes{job="windows-exporter"}) record: :windows_node_memory_MemTotal_bytes:sum - expr: |- sum by (instance) ( - (windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + (windows_memory_available_bytes{job="windows-exporter"}) ) record: node:windows_node_memory_bytes_available:sum - expr: |- sum by (instance) ( - windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_os_visible_memory_bytes{job="windows-exporter"} ) record: node:windows_node_memory_bytes_total:sum - expr: |- @@ -70,43 +70,43 @@ spec: record: node:windows_node_memory_utilisation:ratio - expr: 1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum) record: 'node:windows_node_memory_utilisation:' - - expr: irate(windows_memory_swap_page_operations_total{job="{{ .Values.windowsMonitoring.job }}"}[5m]) + - expr: irate(windows_memory_swap_page_operations_total{job="windows-exporter"}[5m]) record: node:windows_node_memory_swap_io_pages:irate - expr: |- - avg(irate(windows_logical_disk_read_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) + - irate(windows_logical_disk_write_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) + avg(irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m]) ) record: :windows_node_disk_utilisation:avg_irate - expr: |- avg by (instance) ( - (irate(windows_logical_disk_read_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) + - irate(windows_logical_disk_write_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + (irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m])) ) record: node:windows_node_disk_utilisation:avg_irate - expr: |- max by (instance,volume)( - (windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"} - - windows_logical_disk_free_bytes{job="{{ .Values.windowsMonitoring.job }}"}) - / windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"} + (windows_logical_disk_size_bytes{job="windows-exporter"} + - windows_logical_disk_free_bytes{job="windows-exporter"}) + / windows_logical_disk_size_bytes{job="windows-exporter"} ) record: 'node:windows_node_filesystem_usage:' - - expr: max by (instance, volume) (windows_logical_disk_free_bytes{job="{{ .Values.windowsMonitoring.job }}"} / windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"}) + - expr: max by (instance, volume) (windows_logical_disk_free_bytes{job="windows-exporter"} / windows_logical_disk_size_bytes{job="windows-exporter"}) record: 'node:windows_node_filesystem_avail:' - - expr: sum(irate(windows_net_bytes_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + - expr: sum(irate(windows_net_bytes_total{job="windows-exporter"}[1m])) record: :windows_node_net_utilisation:sum_irate - expr: |- sum by (instance) ( - (irate(windows_net_bytes_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + (irate(windows_net_bytes_total{job="windows-exporter"}[1m])) ) record: node:windows_node_net_utilisation:sum_irate - expr: |- - sum(irate(windows_net_packets_received_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + - sum(irate(windows_net_packets_outbound_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + sum(irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m])) + + sum(irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) record: :windows_node_net_saturation:sum_irate - expr: |- sum by (instance) ( - (irate(windows_net_packets_received_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) + - irate(windows_net_packets_outbound_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) + (irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m]) + + irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) ) record: node:windows_node_net_saturation:sum_irate {{- end }} diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml index e953a493fb49..5ce2f0371f59 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml @@ -24,17 +24,17 @@ spec: groups: - name: windows.pod.rules rules: - - expr: windows_container_available{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_available{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_pod_container_available - - expr: windows_container_cpu_usage_seconds_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_cpu_usage_seconds_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_container_total_runtime - - expr: windows_container_memory_usage_commit_bytes{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_memory_usage_commit_bytes{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_container_memory_usage - - expr: windows_container_memory_usage_private_working_set_bytes{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_memory_usage_private_working_set_bytes{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_container_private_working_set_usage - - expr: windows_container_network_receive_bytes_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_network_receive_bytes_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_container_network_received_bytes_total - - expr: windows_container_network_transmit_bytes_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) + - expr: windows_container_network_transmit_bytes_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace) record: windows_container_network_transmitted_bytes_total - expr: |- max by (namespace, pod, container) ( diff --git a/charts/kube-prometheus-stack/values.yaml b/charts/kube-prometheus-stack/values.yaml index 697ef599f8d1..ea152cf71594 100644 --- a/charts/kube-prometheus-stack/values.yaml +++ b/charts/kube-prometheus-stack/values.yaml @@ -201,10 +201,30 @@ global: # - "image-pull-secret" windowsMonitoring: - ## Deploys the windows-exporter and Windows-specific dashboards and rules + ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') enabled: false - ## Job must match jobLabel in the PodMonitor/ServiceMonitor and is used for the rules - job: prometheus-windows-exporter + +## Configuration for prometheus-windows-exporter +## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter +## +prometheus-windows-exporter: + ## Enable ServiceMonitor and set Kubernetes label to use as a job label + ## + prometheus: + monitor: + enabled: true + jobLabel: jobLabel + + ## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards + ## + podLabels: + jobLabel: windows-exporter + + ## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards + ## + config: |- + collectors: + enabled: '[defaults],memory,container' ## Configuration for alertmanager ## ref: https://prometheus.io/docs/alerting/alertmanager/