Skip to content

Commit

Permalink
assets,site/content: daily assets regeneration
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Dec 12, 2024
1 parent 82075a7 commit 2355980
Show file tree
Hide file tree
Showing 19 changed files with 262 additions and 262 deletions.
6 changes: 3 additions & 3 deletions assets/jvm/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ groups:
description: JVM heap memory usage is at {{ printf "%.0f" $value }}% over the
last 5 minutes on {{$labels.instance}}, which is above the threshold of 80%.
summary: JVM heap memory filling up.
expr: ((sum without (id) (jvm_memory_used_bytes{area="heap", job!=""}))/(sum without
(id) (jvm_memory_max_bytes{area="heap", job!=""} != -1))) * 100 > 80
expr: ((sum without (id) (jvm_memory_used_bytes{area="heap", }))/(sum without
(id) (jvm_memory_max_bytes{area="heap", } != -1))) * 100 > 80
for: 5m
keep_firing_for: 5m
labels:
Expand All @@ -18,7 +18,7 @@ groups:
are in a cyclic dependency with each other. The restart is required to resolve
the deadlock.'
summary: JVM deadlock detected.
expr: (jvm_threads_deadlocked{job!=""}) > 0
expr: (jvm_threads_deadlocked{}) > 0
for: 2m
keep_firing_for: 5m
labels:
Expand Down
78 changes: 39 additions & 39 deletions assets/jvm/dashboards/jvm-dashboard.json

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions assets/kafka/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ groups:
description: 'Kafka lag keeps increasing over the last 15 minutes for consumer
group: {{$labels.consumergroup}}, topic: {{$labels.topic}}.'
summary: Kafka lag keeps increasing.
expr: sum by (job,kafka_cluster, topic, consumergroup) (delta(kafka_consumergroup_uncommitted_offsets{job="integrations/kafka",topic!="__consumer_offsets",consumergroup!=""}[5m]))
expr: sum by (job,kafka_cluster, topic, consumergroup) (delta(kafka_consumergroup_uncommitted_offsets{topic!="__consumer_offsets",consumergroup!="",job="integrations/kafka"}[5m]))
> 0
for: 15m
keep_firing_for: 10m
Expand All @@ -17,7 +17,7 @@ groups:
description: 'Total kafka lag across all partitions is too high ({{ printf "%.0f"
$value }}) for consumer group: {{$labels.consumergroup}}, topic: {{$labels.topic}}.'
summary: Kafka lag is too high.
expr: sum by (job,kafka_cluster, topic, consumergroup) (kafka_consumergroup_uncommitted_offsets{job="integrations/kafka",topic!="__consumer_offsets",consumergroup!=""})
expr: sum by (job,kafka_cluster, topic, consumergroup) (kafka_consumergroup_uncommitted_offsets{topic!="__consumer_offsets",consumergroup!="",job="integrations/kafka"})
> 100
for: 15m
keep_firing_for: 5m
Expand Down Expand Up @@ -118,8 +118,8 @@ groups:
description: Kafka broker {{ $labels.instance }} in cluster {{ $labels.kafka_cluster
}} has disconected from Zookeeper.
summary: Kafka Zookeeper sync disconected.
expr: avg by(job,kafka_cluster,instance) (rate(kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{job="integrations/kafka",
quantile="0.95"}[5m])) < 0
expr: avg by(job,kafka_cluster,instance) (rate(kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{quantile="0.95",job="integrations/kafka"}[5m]))
< 0
for: 5m
labels:
severity: critical
Expand Down
40 changes: 20 additions & 20 deletions assets/kafka/dashboards/kafka-overview-dashboard.json

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions assets/kafka/dashboards/kafka-topic-dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_logstartoffset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_logstartoffset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand All @@ -106,7 +106,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_logendoffset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_logendoffset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand All @@ -117,7 +117,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,topic,partition) (\n rate(kafka_topic_partition_current_offset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"expr": "sum by (job,kafka_cluster,topic,partition) (\n rate(kafka_topic_partition_current_offset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand All @@ -128,7 +128,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_size{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"expr": "max by (job,kafka_cluster,topic,partition) (\n kafka_log_log_size{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand Down Expand Up @@ -223,7 +223,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_topic_partition_current_offset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_topic_partition_current_offset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand Down Expand Up @@ -289,7 +289,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_server_brokertopicmetrics_bytesinpersec{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand Down Expand Up @@ -355,7 +355,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"expr": "sum by (job,kafka_cluster,topic) (\n rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ topic }}",
Expand Down Expand Up @@ -435,7 +435,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n rate(kafka_consumergroup_current_offset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}[$__rate_interval])\n)",
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n rate(kafka_consumergroup_current_offset{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand All @@ -446,7 +446,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumergroup_uncommitted_offsets{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"expr": "sum by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumergroup_uncommitted_offsets{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand All @@ -457,7 +457,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumer_lag_millis{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumer_lag_millis{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand Down Expand Up @@ -552,7 +552,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n rate(kafka_consumergroup_current_offset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}[$__rate_interval])\n)",
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n rate(kafka_consumergroup_current_offset{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}[$__rate_interval])\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand Down Expand Up @@ -616,7 +616,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumergroup_uncommitted_offsets{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"expr": "sum by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumergroup_uncommitted_offsets{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand Down Expand Up @@ -680,7 +680,7 @@
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumer_lag_millis{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"expr": "avg by (job,kafka_cluster,consumergroup,topic) (\n kafka_consumer_lag_millis{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\",consumergroup=~\"$consumergroup\"}\n)",
"format": "time_series",
"instant": false,
"legendFormat": "{{ consumergroup }} ({{ topic }})",
Expand Down Expand Up @@ -714,7 +714,7 @@
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values(kafka_log_log_logstartoffset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\"}, job)",
"query": "label_values(kafka_log_log_logstartoffset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
Expand All @@ -729,7 +729,7 @@
"label": "Kafka_cluster",
"multi": true,
"name": "kafka_cluster",
"query": "label_values(kafka_log_log_logstartoffset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\"}, kafka_cluster)",
"query": "label_values(kafka_log_log_logstartoffset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\"}, kafka_cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
Expand All @@ -744,7 +744,7 @@
"label": "Topic",
"multi": true,
"name": "topic",
"query": "label_values(kafka_log_log_logstartoffset{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\"}, topic)",
"query": "label_values(kafka_log_log_logstartoffset{topic!=\"__consumer_offsets\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\"}, topic)",
"refresh": 2,
"sort": 1,
"type": "query"
Expand All @@ -759,7 +759,7 @@
"label": "Consumergroup",
"multi": true,
"name": "consumergroup",
"query": "label_values(kafka_consumergroup_uncommitted_offsets{job=\"integrations/kafka\",topic!=\"__consumer_offsets\",consumergroup!=\"\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}, consumergroup)",
"query": "label_values(kafka_consumergroup_uncommitted_offsets{topic!=\"__consumer_offsets\",consumergroup!=\"\",job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\",topic=~\"$topic\"}, consumergroup)",
"refresh": 2,
"sort": 1,
"type": "query"
Expand Down
8 changes: 4 additions & 4 deletions assets/openstack/dashboards/overview
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,8 @@
},
"fieldConfig": {
"defaults": {
"max": "150",
"min": "0",
"max": 150,
"min": 0,
"thresholds": {
"steps": [
{
Expand Down Expand Up @@ -617,8 +617,8 @@
},
"fieldConfig": {
"defaults": {
"max": "150",
"min": "0",
"max": 150,
"min": 0,
"thresholds": {
"steps": [
{
Expand Down
22 changes: 11 additions & 11 deletions assets/windows-active-directory/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ groups:
CPU usage on host {{ $labels.instance }} is above 90%. The current value is {{ $value | printf "%.2f" }}%.
summary: High CPU usage on Windows host.
expr: |
100 - (avg without (mode, core) (rate(windows_cpu_time_total{job=~".*windows.*", mode="idle"}[2m])) * 100) > 90
100 - (avg without (mode, core) (rate(windows_cpu_time_total{mode="idle", }[2m])) * 100) > 90
for: 15m
keep_firing_for: 5m
labels:
Expand All @@ -18,9 +18,9 @@ groups:
Memory usage on host {{ $labels.instance }} is above 90%. The current value is {{ $value | printf "%.2f" }}%.
summary: High memory usage on Windows host.
expr: |
100 - ((windows_os_physical_memory_free_bytes{job=~".*windows.*"}
100 - ((windows_os_physical_memory_free_bytes{}
/
windows_cs_physical_memory_bytes{job=~".*windows.*"}) * 100) > 90
windows_cs_physical_memory_bytes{}) * 100) > 90
for: 15m
keep_firing_for: 5m
labels:
Expand All @@ -31,7 +31,7 @@ groups:
Volume {{ $labels.volume }} is almost full on host {{ $labels.instance }}, more than 90% of space is used. The current volume utilization is {{ $value | printf "%.2f" }}%.
summary: Disk is almost full on Windows host.
expr: |
100 - ((windows_logical_disk_free_bytes{job=~".*windows.*"} ) / (windows_logical_disk_size_bytes{job=~".*windows.*"})) * 100 > 90
100 - ((windows_logical_disk_free_bytes{} ) / (windows_logical_disk_size_bytes{})) * 100 > 90
for: 15m
keep_firing_for: 5m
labels:
Expand All @@ -42,7 +42,7 @@ groups:
Windows service {{ $labels.name }} is not in healthy state, currently in '{{ $labels.status }}'.
summary: Windows service is not healthy.
expr: |
windows_service_status{job=~".*windows.*", status!~"starting|stopping|ok"} > 0
windows_service_status{status!~"starting|stopping|ok", } > 0
for: 5m
labels:
severity: critical
Expand All @@ -52,7 +52,7 @@ groups:
Windows disk {{ $labels.name }} is not in healthy state, currently in '{{ $labels.status }}' status.
summary: Windows physical disk is not healthy.
expr: |
windows_disk_drive_status{job=~".*windows.*", status="OK"} != 1
windows_disk_drive_status{status="OK", } != 1
for: 5m
labels:
severity: critical
Expand All @@ -62,7 +62,7 @@ groups:
Round-trip time of NTP client on instance {{ $labels.instance }} is greater than 1 second. Delay is {{ $value }} sec.
summary: NTP client delay.
expr: |
windows_time_ntp_round_trip_delay_seconds{job=~".*windows.*"} > 1
windows_time_ntp_round_trip_delay_seconds{} > 1
for: 5m
keep_firing_for: 5m
labels:
Expand All @@ -73,7 +73,7 @@ groups:
NTP time offset for instance {{ $labels.instance }} is greater than 1 second. Offset is {{ $value }} sec.
summary: NTP time offset is too large.
expr: |
windows_time_computed_time_offset_seconds{job=~".*windows.*"} > 1
windows_time_computed_time_offset_seconds{} > 1
for: 5m
keep_firing_for: 5m
labels:
Expand All @@ -85,7 +85,7 @@ groups:
summary: There is a high number of pending replication operations in Active
Directory. A high number of pending operations sustained over a period of
time can indicate a problem with replication.
expr: "windows_ad_replication_pending_operations{job=~\".*windows.*\"} >= 50 \n"
expr: "windows_ad_replication_pending_operations{} >= 50 \n"
for: 10m
keep_firing_for: 5m
labels:
Expand All @@ -97,7 +97,7 @@ groups:
summary: There are a number of replication synchronization request failures.
These can cause authentication failures, outdated information being propagated
across domain controllers, and potentially data loss or inconsistencies.
expr: "increase(windows_ad_replication_sync_requests_schema_mismatch_failure_total{job=~\".*windows.*\"}[5m])
expr: "increase(windows_ad_replication_sync_requests_schema_mismatch_failure_total{}[5m])
> 0 \n"
for: 5m
keep_firing_for: 5m
Expand All @@ -111,7 +111,7 @@ groups:
summary: There is a high number of password changes. This may indicate unauthorized
changes or attacks.
expr: |
increase(windows_ad_sam_password_changes_total{job=~".*windows.*"}[5m]) > 25
increase(windows_ad_sam_password_changes_total{}[5m]) > 25
for: 5m
labels:
keep_firing_for: 24h
Expand Down
Loading

0 comments on commit 2355980

Please sign in to comment.