diff --git a/internal/bundle/application/applications/kubestatemetrics/metrics/servicemonitor.go b/internal/bundle/application/applications/kubestatemetrics/metrics/servicemonitor.go index 99f985b..16b7eb6 100644 --- a/internal/bundle/application/applications/kubestatemetrics/metrics/servicemonitor.go +++ b/internal/bundle/application/applications/kubestatemetrics/metrics/servicemonitor.go @@ -33,8 +33,8 @@ func getLocalServiceMonitor(monitorMatchingLabels, serviceMatchingLabels map[str func getIngestionServiceMonitor(monitorMatchingLabels, serviceMatchingLabels map[string]string) *servicemonitor.Config { relabelings := []*servicemonitor.ConfigRelabeling{{ - Action: "labeldrop", - Regex: "(container|endpoint|namespace|pod)", + Action: "labelkeep", + Regex: "(__.+|job|node|namespace|daemonset|statefulset|deployment|condition|status)", }} metricRelabelings := []*servicemonitor.ConfigRelabeling{{ @@ -62,7 +62,7 @@ func getIngestionServiceMonitor(monitorMatchingLabels, serviceMatchingLabels map Replacement: "dist_${1}", }, { Action: "labelkeep", - Regex: "__.+|job|node|controller", + Regex: "(__.+|job|node|namespace|controller)", }} endpoint := &servicemonitor.ConfigEndpoint{ diff --git a/internal/bundle/application/applications/prometheus/helm/rules.go b/internal/bundle/application/applications/prometheus/helm/rules.go index 2c1a7b0..0242a6e 100644 --- a/internal/bundle/application/applications/prometheus/helm/rules.go +++ b/internal/bundle/application/applications/prometheus/helm/rules.go @@ -128,27 +128,15 @@ groups: clamp_max( clamp_min( ( - max_over_time(dist_kube_deployment_status_replicas_available{namespace="caos-system"}[5m]) - - dist_kube_deployment_spec_replicas{namespace="caos-system"} or - max_over_time(dist_kube_statefulset_status_replicas_ready{namespace="caos-system"}[5m]) - - dist_kube_statefulset_replicas{namespace="caos-system"} or - max_over_time(dist_kube_daemonset_status_number_available{namespace="caos-system"}[5m]) - - dist_kube_daemonset_status_desired_number_scheduled{namespace="caos-system"} + max_over_time(dist_kube_deployment_status_replicas_available[5m]) - + dist_kube_deployment_spec_replicas or + max_over_time(dist_kube_statefulset_status_replicas_ready[5m]) - + dist_kube_statefulset_replicas or + max_over_time(dist_kube_daemonset_status_number_available[5m]) - + dist_kube_daemonset_status_desired_number_scheduled ) + 1, 0 - ) or - clamp_min( - ( - max_over_time(dist_kube_deployment_status_replicas_available{namespace!="caos-system"}[5m]) - - dist_kube_deployment_spec_replicas{namespace!="caos-system"} or - max_over_time(dist_kube_statefulset_status_replicas_ready{namespace!="caos-system"}[5m]) - - dist_kube_statefulset_replicas{namespace!="caos-system"} or - max_over_time(dist_kube_daemonset_status_number_available{namespace!="caos-system"}[5m]) - - dist_kube_daemonset_status_desired_number_scheduled{namespace!="caos-system"} - ) + - 1, - 0.5 ), 1 ) @@ -170,7 +158,7 @@ groups: 1 ) record: caos_scheduled_pods_ryg - - expr: min(caos_node_cpu_ryg) * min(caos_systemd_ryg) * min(caos_vip_probe_ryg) * min(caos_upstream_probe_ryg) * min(caos_node_memory_ryg) * min(caos_k8s_node_ryg) * avg(caos_etcd_ryg) * min(caos_ready_pods_ryg) * min(caos_scheduled_pods_ryg) + - expr: min(caos_node_cpu_ryg) * min(caos_systemd_ryg) * min(caos_vip_probe_ryg) * min(caos_upstream_probe_ryg) * min(caos_node_memory_ryg) * min(caos_k8s_node_ryg) * avg(caos_etcd_ryg) * min(caos_ready_pods_ryg{namespace=~"(caos|kube)-system"}) * min(caos_scheduled_pods_ryg{namespace=~"(caos|kube)-system"}) record: caos_orb_ryg `