diff --git a/monitoring/mongodb/alerts.test.yaml b/monitoring/mongodb/alerts.test.yaml index ea6604184..c70288508 100644 --- a/monitoring/mongodb/alerts.test.yaml +++ b/monitoring/mongodb/alerts.test.yaml @@ -330,35 +330,37 @@ tests: summary: MongoDB node in STARTUP2 state for too long - - name: MongoDbRSNotSynced - interval: 1m input_series: - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2 _ _ _ _ _ _ _ _ _ - - alert_rule_test: - - alertname: MongoDbRSNotSynced - eval_time: 10m - exp_alerts: - - exp_labels: - severity: warning - rs_nm: data-db-mongodb-sharded-shard-0 - exp_annotations: - description: "MongoDB replica set `data-db-mongodb-sharded-shard-0` is not in the expected state. Please ensure that all instances are running properly." - summary: MongoDB replica set out of sync + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: _x10 8x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: _x10 8x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale + + alert_rule_test: + - alertname: MongoDbRSNotSynced + eval_time: 10m + exp_alerts: + - exp_labels: + severity: warning + rs_nm: data-db-mongodb-sharded-shard-0 + exp_annotations: + description: "MongoDB replica set `data-db-mongodb-sharded-shard-0` is not in the expected state. It does not have the expected number of SECONDARY members. Please ensure that all instances are running properly." + summary: MongoDB replica set out of sync diff --git a/monitoring/mongodb/alerts.yaml b/monitoring/mongodb/alerts.yaml index f57e912af..cdbe2b3c8 100644 --- a/monitoring/mongodb/alerts.yaml +++ b/monitoring/mongodb/alerts.yaml @@ -183,10 +183,10 @@ groups: - alert: MongoDbRSNotSynced expr: | - group by(rs_nm)(count by (rs_nm, pod)(mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}) != (${replicas} - 1) ) + group by(rs_nm) ( count by(rs_nm, pod) (mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}) != (${replicas} - 1) ) for: 10m labels: severity: warning annotations: - description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. Please ensure that all instances are running properly." + description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. It does not have the expected number of SECONDARY members. Please ensure that all instances are running properly." summary: MongoDB replica set out of sync