From 54fc07c912f3b765414c3e72e2dc6e4046161913 Mon Sep 17 00:00:00 2001
From: Simon Gerber <simon.gerber@vshn.ch>
Date: Fri, 12 Apr 2024 15:30:42 +0200
Subject: [PATCH] Make OpenShift 4.14 the default

---
 class/defaults.yml                            |   2 +-
 .../prometheus_rules.yaml                     |  93 ++--
 .../prometheus_rules.yaml                     |  93 ++--
 .../prometheus_rules.yaml                     | 433 +++---------------
 .../prometheus_rules.yaml                     |  93 ++--
 .../prometheus_rules.yaml                     |  96 ++--
 .../prometheus_rules.yaml                     |  93 ++--
 .../prometheus_rules.yaml                     | 110 +++--
 8 files changed, 379 insertions(+), 634 deletions(-)

diff --git a/class/defaults.yml b/class/defaults.yml
index 7f0bc8fd..5b1948b2 100644
--- a/class/defaults.yml
+++ b/class/defaults.yml
@@ -8,7 +8,7 @@ parameters:
         alert-patching.libsonnet: openshift4-monitoring-alert-patching.libsonnet
     namespace: openshift-monitoring
     # TODO: select based on reported OCP version once we have dynamic facts
-    manifests_version: release-4.13
+    manifests_version: release-4.14
     =_cluster_monitoring_operator_version_map:
       release-4.13: release-4.13
       release-4.14: release-4.14
diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 836cf8fe..95929000 100644
--- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -249,7 +249,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -285,7 +285,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -339,6 +339,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -610,6 +611,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1076,6 +1078,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1129,6 +1132,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1142,6 +1146,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1170,7 +1175,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1245,10 +1250,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1257,11 +1264,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1274,9 +1302,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1290,9 +1320,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1356,20 +1388,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1502,19 +1520,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1565,7 +1570,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2166,7 +2171,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2180,7 +2185,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2208,7 +2213,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2234,7 +2239,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2244,7 +2249,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2252,6 +2257,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 836cf8fe..95929000 100644
--- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -249,7 +249,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -285,7 +285,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -339,6 +339,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -610,6 +611,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1076,6 +1078,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1129,6 +1132,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1142,6 +1146,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1170,7 +1175,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1245,10 +1250,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1257,11 +1264,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1274,9 +1302,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1290,9 +1320,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1356,20 +1388,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1502,19 +1520,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1565,7 +1570,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2166,7 +2171,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2180,7 +2185,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2208,7 +2213,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2234,7 +2239,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2244,7 +2249,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2252,6 +2257,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index c8a58cd6..b13b9a69 100644
--- a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -146,34 +146,34 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-network-operator-master.rules
       rules:
-        - alert: SYN_NoOvnMasterLeader
+        - alert: SYN_NoOvnClusterManagerLeader
           annotations:
             description: |
               Networking control plane is degraded. Networking configuration updates applied to the cluster will not be
               implemented while there is no OVN Kubernetes leader. Existing workloads should continue to have connectivity.
               OVN-Kubernetes control plane is not functional.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/NoOvnMasterLeader.md
-            summary: There is no ovn-kubernetes master leader.
+            summary: There is no ovn-kubernetes cluster manager leader.
             syn_component: openshift4-monitoring
           expr: |
             # Without max_over_time, failed scrapes could create false negatives, see
             # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            max by (namespace) (max_over_time(ovnkube_master_leader[5m])) == 0
+            max by (namespace) (max_over_time(ovnkube_clustermanager_leader[5m])) == 0
           for: 5m
           labels:
             severity: critical
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NoRunningOvnMaster
+        - alert: SYN_NoRunningOvnControlPlane
           annotations:
             description: |
               Networking control plane is degraded. Networking configuration updates applied to the cluster will not be
               implemented while there are no OVN Kubernetes pods.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/NoRunningOvnMaster.md
-            summary: There is no running ovn-kubernetes master.
+            summary: There is no running ovn-kubernetes control plane.
             syn_component: openshift4-monitoring
           expr: |
-            absent(up{job="ovnkube-master", namespace="openshift-ovn-kubernetes"} == 1)
+            absent(up{job="ovnkube-control-plane", namespace="openshift-ovn-kubernetes"} == 1)
           for: 5m
           labels:
             namespace: openshift-ovn-kubernetes
@@ -193,333 +193,12 @@ spec:
           expr: |
             # Without max_over_time, failed scrapes could create false negatives, see
             # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            time() - max_over_time(ovnkube_master_nb_e2e_timestamp[5m]) > 120
+            time() - max_over_time(ovnkube_controller_nb_e2e_timestamp[5m]) > 120
           for: 10m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseClusterIDError
-          annotations:
-            description: More than one OVN northbound database cluster ID indicates
-              degraded OVN database high availability and possible database split
-              brain.
-            summary: Multiple OVN northbound database cluster IDs exist.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(count(min_over_time(ovn_db_cluster_id{db_name="OVN_Northbound"}[5m])) by (cluster_id, namespace)) by (namespace) > 1
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseClusterMemberError
-          annotations:
-            description: OVN northbound database server(s) has not been a RAFT cluster
-              member for a period of time which may indicate degraded OVN database
-              high availability cluster.
-            summary: OVN northbound database server(s) has not been a member of the
-              databases high availability for a period of time.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_nbdb_not_cluster_member:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseInboundConnectionError
-          annotations:
-            description: OVN northbound database server(s) is experiencing inbound
-              RAFT connectivity errors which may indicate degraded OVN database high
-              availability.
-            summary: OVN northbound database server(s) is experiencing inbound RAFT
-              connectivity errors.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            # ..error_total is set to zero when error resolves itself
-            min_over_time(ovn_db_cluster_inbound_connections_error_total{db_name="OVN_Northbound"}[5m]) > 0
-          for: 5m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseInboundConnectionMissing
-          annotations:
-            description: OVN northbound database server(s) do not have expected number
-              of inbound connections for a RAFT cluster which may indicate degraded
-              OVN database high availability.
-            summary: OVN northbound database server(s) do not have expected number
-              of inbound RAFT connections.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Expected sum of inbound connections is number of control plane nodes * number of control plane nodes minus one
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_nbdb_missing_inbound_connections:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseLeaderError
-          annotations:
-            description: OVN northbound database(s) have no RAFT leader. Networking
-              control plane is degraded.
-            summary: OVN northbound database(s) have no RAFT leader
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(max_over_time(ovn_db_cluster_server_role{db_name="OVN_Northbound", server_role="leader"}[5m])) by (namespace) == 0
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseMultipleLeadersError
-          annotations:
-            description: OVN northbound database(s) have multiple RAFT leaders which
-              may indicate degraded OVN database high availability.
-            summary: OVN northbound database(s) have multiple RAFT leaders
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(min_over_time(ovn_db_cluster_server_role{db_name="OVN_Northbound", server_role="leader"}[1m])) by (leader, namespace) > 1
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseOutboundConnectionError
-          annotations:
-            description: OVN northbound database server(s) outbound RAFT connectivity
-              errors may indicate degraded OVN database high availability.
-            summary: OVN northbound database server(s) is experiencing outbound RAFT
-              connectivity errors.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            # ..error_total is set to zero when error resolves itself
-            min_over_time(ovn_db_cluster_outbound_connections_error_total{db_name="OVN_Northbound"}[5m]) > 0
-          for: 5m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseOutboundConnectionMissing
-          annotations:
-            description: OVN northbound database server(s) do not have expected number
-              of outbound connections for a RAFT cluster which may indicate degraded
-              OVN database high availability.
-            summary: OVN northbound database server(s) do not have expected number
-              of outbound RAFT connections.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Expected sum of outbound connections is number of control plane nodes * number of control plane nodes minus one
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_nbdb_missing_outbound_connections:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthboundDatabaseTermLag
-          annotations:
-            description: OVN northbound database(s) RAFT term have not been equal
-              which may indicate degraded OVN database high availability.
-            summary: OVN northbound databases RAFT term have not been equal for a
-              period of time.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            max(max_over_time(ovn_db_cluster_term{db_name="OVN_Northbound"}[5m])) by (namespace) - min(max_over_time(ovn_db_cluster_term{db_name="OVN_Northbound"}[5m])) by (namespace) > 0
-          for: 25m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesNorthdInactive
-          annotations:
-            description: Exactly one OVN northd must have an active status within
-              the high availability set. Networking control plane is degraded.
-            summary: Exactly one OVN northd instance must have an active status.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(ovn_northd_status == 1) by (namespace) != 1
-          for: 5m
           labels:
             severity: critical
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseClusterIDError
-          annotations:
-            description: More than one OVN southbound database cluster ID indicates
-              degraded OVN database high availability and possible database split
-              brain.
-            summary: Multiple OVN southbound database cluster IDs exist.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(count(min_over_time(ovn_db_cluster_id{db_name="OVN_Southbound"}[5m])) by (cluster_id, namespace)) by (namespace) > 1
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseClusterMemberError
-          annotations:
-            description: OVN southbound database server(s) has not been a RAFT cluster
-              member for a period of time which may indicate degraded OVN database
-              high availability.
-            summary: OVN southbound database server(s) has not been a member of the
-              databases high availability for a period of time.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_sbdb_not_cluster_member:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseInboundConnectionError
-          annotations:
-            description: OVN southbound database server(s) is experiencing inbound
-              RAFT connectivity errors which may indicate degraded OVN database high
-              availability.
-            summary: OVN southbound database server(s) is experiencing inbound RAFT
-              connectivity errors.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            # ..error_total is set to zero when error resolves itself
-            min_over_time(ovn_db_cluster_inbound_connections_error_total{db_name="OVN_Southbound"}[5m]) > 0
-          for: 5m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseInboundConnectionMissing
-          annotations:
-            description: OVN southbound database server(s) do not have expected number
-              of inbound connections for a RAFT cluster which may indicate degraded
-              OVN database high availability.
-            summary: OVN southbound database server(s) do not have expected number
-              of inbound RAFT connections.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Expected sum of inbound connections is number of control plane nodes * number of control plane nodes minus one
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_sbdb_missing_inbound_connections:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseLeaderError
-          annotations:
-            description: OVN southbound database(s) have no leader. Networking control
-              plane is degraded.
-            summary: OVN southbound database(s) have no RAFT leader
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(max_over_time(ovn_db_cluster_server_role{db_name="OVN_Southbound", server_role="leader"}[5m])) by (namespace) == 0
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseMultipleLeadersError
-          annotations:
-            description: OVN southbound database(s) have multiple RAFT leaders which
-              may indicate degraded OVN database high availability.
-            summary: OVN southbound database(s) have multiple RAFT leaders
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            count(min_over_time(ovn_db_cluster_server_role{db_name="OVN_Southbound", server_role="leader"}[1m])) by (leader, namespace) > 1
-          for: 5m
-          labels:
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseOutboundConnectionError
-          annotations:
-            description: OVN southbound database server(s) outbound RAFT connectivity
-              errors which may indicate degraded OVN database high availability.
-            summary: OVN southbound database server(s) is experiencing outbound RAFT
-              connectivity errors.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            # ..error_total is set to zero when error resolves itself
-            min_over_time(ovn_db_cluster_outbound_connections_error_total{db_name="OVN_Southbound"}[5m]) > 0
-          for: 5m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseOutboundConnectionMissing
-          annotations:
-            description: OVN southbound database server(s) do not have expected number
-              of outbound connections for a RAFT cluster which may indicate degraded
-              OVN database high availability.
-            summary: OVN southbound database server(s) do not have expected number
-              of outbound RAFT connections.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Expected sum of outbound connections is number of control plane nodes * number of control plane nodes minus one
-            # Without min_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            min_over_time(cluster:ovn_db_sbdb_missing_outbound_connections:abs[5m]) != 0
-          for: 5m
-          labels:
-            namespace: openshift-ovn-kubernetes
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-        - alert: SYN_OVNKubernetesSouthboundDatabaseTermLag
-          annotations:
-            description: OVN southbound database(s) RAFT term have not been equal
-              which may indicate degraded OVN database high availability.
-            summary: OVN southbound databases RAFT term have not been equal for a
-              period of time.
-            syn_component: openshift4-monitoring
-          expr: |
-            # Without max_over_time, failed scrapes could create false negatives, see
-            # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            max(max_over_time(ovn_db_cluster_term{db_name="OVN_Southbound"}[5m])) by (namespace) - min(max_over_time(ovn_db_cluster_term{db_name="OVN_Southbound"}[5m])) by (namespace) > 0
-          for: 25m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_SouthboundStale
           annotations:
             description: |
@@ -533,10 +212,10 @@ spec:
           expr: |
             # Without max_over_time, failed scrapes could create false negatives, see
             # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
-            max_over_time(ovnkube_master_nb_e2e_timestamp[5m]) - max_over_time(ovnkube_master_sb_e2e_timestamp[5m]) > 120
+            max_over_time(ovnkube_controller_nb_e2e_timestamp[5m]) - max_over_time(ovnkube_controller_sb_e2e_timestamp[5m]) > 120
           for: 10m
           labels:
-            severity: warning
+            severity: critical
             syn: 'true'
             syn_component: openshift4-monitoring
         - alert: SYN_V4SubnetAllocationThresholdExceeded
@@ -593,6 +272,7 @@ spec:
           annotations:
             description: |
               Networking is degraded on nodes when OVN controller is not connected to OVN southbound database connection. No networking control plane updates will be applied to the node.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/OVNKubernetesControllerDisconnectedSouthboundDatabase.md
             summary: Networking control plane is degraded on node {{ $labels.node
               }} because OVN controller is not connected to OVN southbound database.
             syn_component: openshift4-monitoring
@@ -703,7 +383,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -739,7 +419,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -793,6 +473,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -1064,6 +745,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1530,6 +1212,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1583,6 +1266,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1596,6 +1280,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1624,7 +1309,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1699,10 +1384,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1711,11 +1398,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1728,9 +1436,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1744,9 +1454,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1810,20 +1522,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1956,19 +1654,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -2019,7 +1704,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2620,7 +2305,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2634,7 +2319,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2662,7 +2347,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2688,7 +2373,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2698,7 +2383,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2706,6 +2391,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 836cf8fe..95929000 100644
--- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -249,7 +249,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -285,7 +285,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -339,6 +339,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -610,6 +611,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1076,6 +1078,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1129,6 +1132,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1142,6 +1146,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1170,7 +1175,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1245,10 +1250,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1257,11 +1264,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1274,9 +1302,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1290,9 +1320,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1356,20 +1388,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1502,19 +1520,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1565,7 +1570,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2166,7 +2171,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2180,7 +2185,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2208,7 +2213,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2234,7 +2239,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2244,7 +2249,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2252,6 +2257,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 39a5b929..f82725a8 100644
--- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -262,7 +262,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -300,7 +300,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -357,6 +357,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -640,6 +641,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1131,6 +1133,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1188,6 +1191,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1202,6 +1206,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1232,7 +1237,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
             syn_team: clumsy-donkeys
@@ -1312,10 +1317,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1325,11 +1332,33 @@ spec:
             syn: 'true'
             syn_component: openshift4-monitoring
             syn_team: clumsy-donkeys
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
+            syn_team: clumsy-donkeys
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1343,9 +1372,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1360,9 +1391,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1430,21 +1463,6 @@ spec:
             syn: 'true'
             syn_component: openshift4-monitoring
             syn_team: clumsy-donkeys
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-            syn_team: clumsy-donkeys
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1584,20 +1602,6 @@ spec:
             syn: 'true'
             syn_component: openshift4-monitoring
             syn_team: clumsy-donkeys
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
-            syn_team: clumsy-donkeys
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1651,7 +1655,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2288,7 +2292,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2303,7 +2307,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2333,7 +2337,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2361,7 +2365,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2372,7 +2376,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2380,6 +2384,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 0f1fa02e..2b7ff60f 100644
--- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -249,7 +249,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -285,7 +285,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -339,6 +339,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -610,6 +611,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1076,6 +1078,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1129,6 +1132,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1142,6 +1146,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1170,7 +1175,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1245,10 +1250,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1257,11 +1264,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1274,9 +1302,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1290,9 +1320,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1356,20 +1388,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1502,19 +1520,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1565,7 +1570,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2166,7 +2171,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2180,7 +2185,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2208,7 +2213,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2234,7 +2239,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2244,7 +2249,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2252,6 +2257,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 1428daf0..8a35c738 100644
--- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -249,7 +249,7 @@ spec:
         - alert: SYN_ClusterOperatorDown
           annotations:
             description: The {{ $labels.name }} operator may be down or disabled because
-              ${{ $labels.reason }}, and the components it manages may be unavailable
+              {{ $labels.reason }}, and the components it manages may be unavailable
               or degraded.  Cluster upgrades may not complete. For more information
               refer to 'oc get -o yaml clusteroperator {{ $labels.name }}'{{ with
               $console_url := "console_url" | query }}{{ if ne (len (label "url" (first
@@ -285,7 +285,7 @@ spec:
         - alert: SYN_ClusterReleaseNotAccepted
           annotations:
             description: The desired cluster release has not been accepted because
-              ${{ $labels.reason }}, and the cluster will continue to reconcile an
+              {{ $labels.reason }}, and the cluster will continue to reconcile an
               earlier release instead of moving towards that desired release.  For
               more information refer to 'oc adm upgrade'{{ with $console_url := "console_url"
               | query }}{{ if ne (len (label "url" (first $console_url ) ) ) 0}} or
@@ -339,6 +339,7 @@ spec:
         - alert: SYN_KubeSchedulerDown
           annotations:
             description: KubeScheduler has disappeared from Prometheus target discovery.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-scheduler-operator/KubeSchedulerDown.md
             summary: Target disappeared from Prometheus target discovery.
             syn_component: openshift4-monitoring
           expr: |
@@ -610,6 +611,7 @@ spec:
             > 0.01
           for: 15m
           labels:
+            namespace: openshift-monitoring
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1076,6 +1078,7 @@ spec:
             sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1129,6 +1132,7 @@ spec:
             node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
           for: 5m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1142,6 +1146,7 @@ spec:
             histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
           for: 15m
           labels:
+            namespace: kube-system
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
@@ -1170,7 +1175,7 @@ spec:
             mapi_mao_collector_up == 0
           for: 5m
           labels:
-            severity: critical
+            severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
     - name: syn-machine-health-check-unterminated-short-circuit
@@ -1245,10 +1250,12 @@ spec:
       rules:
         - alert: SYN_MCCDrainError
           annotations:
-            message: 'Drain failed on {{ $labels.exported_node }} , updates may be
-              blocked. For more details check MachineConfigController pod logs: oc
-              logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c
-              machine-config-controller'
+            description: 'Drain failed on {{ $labels.exported_node }} , updates may
+              be blocked. For more details check MachineConfigController pod logs:
+              oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Alerts the user to a failed node drain. Always triggers when
+              the failure happens one or more times.
             syn_component: openshift4-monitoring
           expr: |
             mcc_drain_err > 0
@@ -1257,11 +1264,32 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+    - name: syn-mcc-pool-alert
+      rules:
+        - alert: SYN_MCCPoolAlert
+          annotations:
+            description: 'Node {{ $labels.exported_node }} has triggered a pool alert
+              due to a label change. For more details check MachineConfigController
+              pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx
+              -c machine-config-controller'
+            summary: Triggers when nodes in a pool have overlapping labels such as
+              master, worker, and a custom label therefore a choice must be made as
+              to which is honored.
+            syn_component: openshift4-monitoring
+          expr: |
+            mcc_pool_alert > 0
+          labels:
+            namespace: openshift-machine-config-operator
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
     - name: syn-mcd-kubelet-health-state-error
       rules:
         - alert: SYN_KubeletHealthState
           annotations:
-            message: Kubelet health failure threshold reached
+            description: Kubelet health failure threshold reached
+            summary: This keeps track of Kubelet health failures, and tallys them.
+              The warning is triggered if 2 or more failures occur.
             syn_component: openshift4-monitoring
           expr: |
             mcd_kubelet_state > 2
@@ -1274,9 +1302,11 @@ spec:
       rules:
         - alert: SYN_MCDPivotError
           annotations:
-            message: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
+            description: 'Error detected in pivot logs on {{ $labels.node }} , upgrade
               may be blocked. For more details:  oc logs -f -n {{ $labels.namespace
               }} {{ $labels.pod }} -c machine-config-daemon '
+            summary: Alerts the user when an error is detected upon pivot. This triggers
+              if the pivot errors are above zero for 2 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_pivot_errors_total > 0
@@ -1290,9 +1320,11 @@ spec:
       rules:
         - alert: SYN_MCDRebootError
           annotations:
-            message: 'Reboot failed on {{ $labels.node }} , update may be blocked.
+            description: 'Reboot failed on {{ $labels.node }} , update may be blocked.
               For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod
               }} -c machine-config-daemon '
+            summary: Alerts the user that a node failed to reboot one or more times
+              over a span of 5 minutes.
             syn_component: openshift4-monitoring
           expr: |
             mcd_reboots_failed_total > 0
@@ -1356,20 +1388,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeDiskIOSaturation
-          annotations:
-            description: |
-              Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-              This symptom might indicate disk saturation.
-            summary: Disk IO queue is high.
-            syn_component: openshift4-monitoring
-          expr: |
-            rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m]) > 10
-          for: 30m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeFileDescriptorLimit
           annotations:
             description: File descriptors limit at {{ $labels.instance }} is currently
@@ -1502,19 +1520,6 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
-        - alert: SYN_NodeMemoryHighUtilization
-          annotations:
-            description: |
-              Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-            summary: Host is running out of memory.
-            syn_component: openshift4-monitoring
-          expr: |
-            100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90
-          for: 15m
-          labels:
-            severity: warning
-            syn: 'true'
-            syn_component: openshift4-monitoring
         - alert: SYN_NodeMemoryMajorPagesFaults
           annotations:
             description: |
@@ -1565,7 +1570,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-          for: 5m
+          for: 15m
           labels:
             severity: warning
             syn: 'true'
@@ -2166,7 +2171,7 @@ spec:
             summary: Prometheus operator not ready
             syn_component: openshift4-monitoring
           expr: |
-            min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
+            min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]) == 0)
           for: 5m
           labels:
             severity: warning
@@ -2180,7 +2185,7 @@ spec:
             summary: Errors while reconciling objects.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2208,7 +2213,7 @@ spec:
             summary: Errors while updating objects status.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1
           for: 10m
           labels:
             severity: warning
@@ -2234,7 +2239,7 @@ spec:
             summary: Errors while performing watch operations in controller.
             syn_component: openshift4-monitoring
           expr: |
-            (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
+            (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.4
           for: 15m
           labels:
             severity: warning
@@ -2244,7 +2249,7 @@ spec:
       rules:
         - alert: SYN_SystemMemoryExceedsReservation
           annotations:
-            message: System memory usage of {{ $value | humanize }} on {{ $labels.node
+            description: System memory usage of {{ $value | humanize }} on {{ $labels.node
               }} exceeds 95% of the reservation. Reserved memory ensures system processes
               can function even when the node is fully allocated and protects against
               workload out of memory events impacting the proper functioning of the
@@ -2252,6 +2257,8 @@ spec:
               configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html)
               when running nodes with high numbers of pods (either due to rate of
               change or at steady state).
+            summary: Alerts the user when, for 15 miutes, a specific node is using
+              more memory than is reserved
             syn_component: openshift4-monitoring
           expr: |
             sum by (node) (container_memory_rss{id="/system.slice"}) > ((sum by (node) (kube_node_status_capacity{resource="memory"} - kube_node_status_allocatable{resource="memory"})) * 0.95)
@@ -2455,3 +2462,20 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-monitoring
+        - alert: SYN_VSphereOpenshiftVmsCBTMismatch
+          annotations:
+            description: |
+              Cluster node VMs are not configured the same for CBT feature.
+            message: Cluster node VMs are not configured the same for CBT feature.
+            summary: |
+              Periodic vSphere health check is failing due to some nodes not having ctkEnabled matching the other nodes.
+              To get details about the failure, please see the logs in the vsphere-problem-detector-operator pod in namespace openshift-cluster-storage-operator:
+              ' oc logs -l name=vsphere-problem-detector-operator -n openshift-cluster-storage-operator --tail=-1 | grep "node_cbt"'
+            syn_component: openshift4-monitoring
+          expr: min_over_time(vsphere_vm_cbt_checks{cbt=~"enabled"}[5m]) > 0 and on()
+            min_over_time(vsphere_vm_cbt_checks{cbt=~"disabled"}[5m]) > 0
+          for: 10m
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring