Skip to content

Commit

Permalink
Merge branch 'w/2.7/bugfix/ZENKO-4715' into w/2.8/bugfix/ZENKO-4715
Browse files Browse the repository at this point in the history
  • Loading branch information
francoisferrand committed Dec 18, 2023
2 parents 098f498 + 08bead7 commit 2f25f11
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 22 deletions.
1 change: 1 addition & 0 deletions monitoring/kafka/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ groups:
> ${maxConsumerLagMessagesWarningThreshold}
or
kafka_consumergroup_group_max_lag_seconds{namespace="${namespace}",cluster_name="${cluster}",group!=""}
< (1/0)
> ${maxConsumerLagSecondsWarningThreshold}
for: 5m
labels:
Expand Down
46 changes: 34 additions & 12 deletions monitoring/mongodb/alerts.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,30 +168,52 @@ tests:
- name: ReplicationLagWarning
interval: 1m
input_series:
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-0", job="zenko/data-db-mongodb-sharded-shard0-data", member_idx="mongo-0", member_state="PRIMARY"}
values: 5 25 35
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-1", job="zenko/data-db-mongodb-sharded-shard0-data", member_idx="mongo-1", member_state="SECONDARY"}
values: 0 12 29
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-2", job="zenko/data-db-mongodb-sharded-shard0-data", member_idx="mongo-2", member_state="SECONDARY"}
values: 2 2 31
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-configsvr-1", job="zenko/data-db-mongodb-sharded-configsvr", member_idx="mongo-1", member_state="PRIMARY"}
values: 71 83 95
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-0", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="PRIMARY"}
values: 5 35000+1000x10 45000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-0", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 0 24000+1000x10 39000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-0", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 2 2000+1000x10 41000

- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-1", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="PRIMARY"}
values: 5 34000+1000x10 44000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-1", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 0 26000+1000x10 40000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-1", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 2 1000+1000x10 40000

- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-2", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="PRIMARY"}
values: 5 12000+1000x10 43000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-2", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 0 9000+1000x10 38000
- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-shard0-data-2", rs_nm="shard0-data", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-shardsrv-headless.svc.cluster.local", member_state="SECONDARY"}
values: 2 3000+1000x10 42000

- series: mongodb_rs_members_optimeDate{namespace="zenko",pod="data-db-mongodb-sharded-configsvr-1", rs_nm="configsvr", member_idx="data-db-mongodb-sharded-cfgsvr-0.data-db-mongodb-sharded-cfgsvr-headless.svc.cluster.local", member_state="PRIMARY"}
values: 71 8300 9500

alert_rule_test:
- alertname: ReplicationLagWarning
eval_time: 1m
exp_alerts: []
- alertname: ReplicationLagWarning
eval_time: 1m30s
eval_time: 2m
exp_alerts: []
- alertname: ReplicationLagWarning
eval_time: 10m
exp_alerts: []
- alertname: ReplicationLagWarning
eval_time: 11m
exp_alerts:
- exp_labels:
severity: warning
job: shard0-data
member_idx: data-db-mongodb-sharded-shard0-data-2
rs_nm: shard0-data
exp_annotations:
description: Mongodb replication lag for `shard0-data` is more than 10s.
description: Mongodb replication lag for `data-db-mongodb-sharded-shard0-data-2` is more than 30 seconds.
summary: MongoDB replication lag
- alertname: ReplicationLagWarning
eval_time: 2m
eval_time: 12m
exp_alerts: []

- name: TooManyClientConnectionsWarning
Expand Down
18 changes: 10 additions & 8 deletions monitoring/mongodb/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,19 @@ groups:
- alert: ReplicationLagWarning
expr: |
label_replace(
max(mongodb_rs_members_optimeDate{namespace="${namespace}",pod=~"${service}.*",member_state="PRIMARY"})
by(job)
-min(mongodb_rs_members_optimeDate{namespace="${namespace}",pod=~"${service}.*",member_state="SECONDARY"})
by(job)
> 10
, "job", "$1", "job", "(?:${namespace}/)?${service}-?(.*)")
for: 30s
max(
max(mongodb_rs_members_optimeDate{namespace="${namespace}",pod=~"${service}.*",member_state="PRIMARY"})
by(pod, rs_nm)
- ignoring(member_idx) group_right
min(mongodb_rs_members_optimeDate{namespace="${namespace}",pod=~"${service}.*",member_state="SECONDARY"})
by(pod, rs_nm, member_idx)
) by(member_idx, rs_nm) / 1000 > 30
, "member_idx", "$1", "member_idx", "(${service}[^.]*)\\.${service}.*")
for: 10m
labels:
severity: warning
annotations:
description: "Mongodb replication lag for `{{ $labels.job }}` is more than 10s."
description: "Mongodb replication lag for `{{ $labels.member_idx }}` is more than 30 seconds."
summary: MongoDB replication lag

- alert: TooManyClientConnectionsWarning
Expand Down
4 changes: 2 additions & 2 deletions solution/deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ backbeat:
dashboard: backbeat-dashboards
image: backbeat
policy: backbeat-policies
tag: 8.6.31
tag: 8.6.32
envsubst: BACKBEAT_TAG
busybox:
image: busybox
Expand Down Expand Up @@ -93,7 +93,7 @@ vault:
zenko-operator:
sourceRegistry: registry.scality.com/zenko-operator
image: zenko-operator
tag: 1.5.38
tag: 1.5.39
envsubst: ZENKO_OPERATOR_TAG
zenko-ui:
sourceRegistry: registry.scality.com/zenko-ui
Expand Down

0 comments on commit 2f25f11

Please sign in to comment.