Skip to content

Commit

Permalink
[kube-prometheus-stack] fix: use correct job name for dashboards to w…
Browse files Browse the repository at this point in the history
…ork, make deployment more intuitively (#3672)

* fix: use correct job name for dashboards to work, make deployment more intuitively

Signed-off-by: Markus Lippert <[email protected]>

* merge main

Signed-off-by: Markus Lippert <[email protected]>

* bump version

Signed-off-by: Markus Lippert <[email protected]>

---------

Signed-off-by: Markus Lippert <[email protected]>
  • Loading branch information
lippertmarkus authored Sep 22, 2023
1 parent fe84f27 commit 9c8e405
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 36 deletions.
2 changes: 1 addition & 1 deletion charts/kube-prometheus-stack/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ name: kube-prometheus-stack
sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
version: 51.1.0
version: 51.1.1
appVersion: v0.68.0
kubeVersion: ">=1.19.0-0"
home: https://github.com/prometheus-operator/kube-prometheus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,41 @@ spec:
rules:
- expr: |-
count (
windows_system_system_up_time{job="{{ .Values.windowsMonitoring.job }}"}
windows_system_system_up_time{job="windows-exporter"}
)
record: node:windows_node:sum
- expr: |-
count by (instance) (sum by (instance, core) (
windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}"}
windows_cpu_time_total{job="windows-exporter"}
))
record: node:windows_node_num_cpu:sum
- expr: 1 - avg(rate(windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}",mode="idle"}[1m]))
- expr: 1 - avg(rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m]))
record: :windows_node_cpu_utilisation:avg1m
- expr: |-
1 - avg by (instance) (
rate(windows_cpu_time_total{job="{{ .Values.windowsMonitoring.job }}",mode="idle"}[1m])
rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m])
)
record: node:windows_node_cpu_utilisation:avg1m
- expr: |-
1 -
sum(windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"})
sum(windows_memory_available_bytes{job="windows-exporter"})
/
sum(windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"})
sum(windows_os_visible_memory_bytes{job="windows-exporter"})
record: ':windows_node_memory_utilisation:'
- expr: sum(windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_cache_bytes{job="{{ .Values.windowsMonitoring.job }}"})
- expr: sum(windows_memory_available_bytes{job="windows-exporter"} + windows_memory_cache_bytes{job="windows-exporter"})
record: :windows_node_memory_MemFreeCached_bytes:sum
- expr: (windows_memory_cache_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_modified_page_list_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_core_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_normal_priority_bytes{job="{{ .Values.windowsMonitoring.job }}"} + windows_memory_standby_cache_reserve_bytes{job="{{ .Values.windowsMonitoring.job }}"})
- expr: (windows_memory_cache_bytes{job="windows-exporter"} + windows_memory_modified_page_list_bytes{job="windows-exporter"} + windows_memory_standby_cache_core_bytes{job="windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="windows-exporter"})
record: node:windows_node_memory_totalCached_bytes:sum
- expr: sum(windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"})
- expr: sum(windows_os_visible_memory_bytes{job="windows-exporter"})
record: :windows_node_memory_MemTotal_bytes:sum
- expr: |-
sum by (instance) (
(windows_memory_available_bytes{job="{{ .Values.windowsMonitoring.job }}"})
(windows_memory_available_bytes{job="windows-exporter"})
)
record: node:windows_node_memory_bytes_available:sum
- expr: |-
sum by (instance) (
windows_os_visible_memory_bytes{job="{{ .Values.windowsMonitoring.job }}"}
windows_os_visible_memory_bytes{job="windows-exporter"}
)
record: node:windows_node_memory_bytes_total:sum
- expr: |-
Expand All @@ -70,43 +70,43 @@ spec:
record: node:windows_node_memory_utilisation:ratio
- expr: 1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)
record: 'node:windows_node_memory_utilisation:'
- expr: irate(windows_memory_swap_page_operations_total{job="{{ .Values.windowsMonitoring.job }}"}[5m])
- expr: irate(windows_memory_swap_page_operations_total{job="windows-exporter"}[5m])
record: node:windows_node_memory_swap_io_pages:irate
- expr: |-
avg(irate(windows_logical_disk_read_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) +
irate(windows_logical_disk_write_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])
avg(irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) +
irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m])
)
record: :windows_node_disk_utilisation:avg_irate
- expr: |-
avg by (instance) (
(irate(windows_logical_disk_read_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) +
irate(windows_logical_disk_write_seconds_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]))
(irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) +
irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m]))
)
record: node:windows_node_disk_utilisation:avg_irate
- expr: |-
max by (instance,volume)(
(windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"}
- windows_logical_disk_free_bytes{job="{{ .Values.windowsMonitoring.job }}"})
/ windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"}
(windows_logical_disk_size_bytes{job="windows-exporter"}
- windows_logical_disk_free_bytes{job="windows-exporter"})
/ windows_logical_disk_size_bytes{job="windows-exporter"}
)
record: 'node:windows_node_filesystem_usage:'
- expr: max by (instance, volume) (windows_logical_disk_free_bytes{job="{{ .Values.windowsMonitoring.job }}"} / windows_logical_disk_size_bytes{job="{{ .Values.windowsMonitoring.job }}"})
- expr: max by (instance, volume) (windows_logical_disk_free_bytes{job="windows-exporter"} / windows_logical_disk_size_bytes{job="windows-exporter"})
record: 'node:windows_node_filesystem_avail:'
- expr: sum(irate(windows_net_bytes_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]))
- expr: sum(irate(windows_net_bytes_total{job="windows-exporter"}[1m]))
record: :windows_node_net_utilisation:sum_irate
- expr: |-
sum by (instance) (
(irate(windows_net_bytes_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]))
(irate(windows_net_bytes_total{job="windows-exporter"}[1m]))
)
record: node:windows_node_net_utilisation:sum_irate
- expr: |-
sum(irate(windows_net_packets_received_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m])) +
sum(irate(windows_net_packets_outbound_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]))
sum(irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m])) +
sum(irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m]))
record: :windows_node_net_saturation:sum_irate
- expr: |-
sum by (instance) (
(irate(windows_net_packets_received_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]) +
irate(windows_net_packets_outbound_discarded_total{job="{{ .Values.windowsMonitoring.job }}"}[1m]))
(irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m]) +
irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m]))
)
record: node:windows_node_net_saturation:sum_irate
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ spec:
groups:
- name: windows.pod.rules
rules:
- expr: windows_container_available{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_available{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_pod_container_available
- expr: windows_container_cpu_usage_seconds_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_cpu_usage_seconds_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_container_total_runtime
- expr: windows_container_memory_usage_commit_bytes{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_memory_usage_commit_bytes{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_container_memory_usage
- expr: windows_container_memory_usage_private_working_set_bytes{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_memory_usage_private_working_set_bytes{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_container_private_working_set_usage
- expr: windows_container_network_receive_bytes_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_network_receive_bytes_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_container_network_received_bytes_total
- expr: windows_container_network_transmit_bytes_total{job="{{ .Values.windowsMonitoring.job }}"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
- expr: windows_container_network_transmit_bytes_total{job="windows-exporter"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics"}) by(container, container_id, pod, namespace)
record: windows_container_network_transmitted_bytes_total
- expr: |-
max by (namespace, pod, container) (
Expand Down
26 changes: 23 additions & 3 deletions charts/kube-prometheus-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,30 @@ global:
# - "image-pull-secret"

windowsMonitoring:
## Deploys the windows-exporter and Windows-specific dashboards and rules
## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter')
enabled: false
## Job must match jobLabel in the PodMonitor/ServiceMonitor and is used for the rules
job: prometheus-windows-exporter

## Configuration for prometheus-windows-exporter
## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter
##
prometheus-windows-exporter:
## Enable ServiceMonitor and set Kubernetes label to use as a job label
##
prometheus:
monitor:
enabled: true
jobLabel: jobLabel

## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards
##
podLabels:
jobLabel: windows-exporter

## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards
##
config: |-
collectors:
enabled: '[defaults],memory,container'
## Configuration for alertmanager
## ref: https://prometheus.io/docs/alerting/alertmanager/
Expand Down

0 comments on commit 9c8e405

Please sign in to comment.