From 3859db34df5393cdc9f985e7003ffb36f4fb4058 Mon Sep 17 00:00:00 2001
From: shehbaz-pathan <46107507+shehbaz-pathan@users.noreply.github.com>
Date: Thu, 27 Jun 2024 11:03:15 +0530
Subject: [PATCH] Restructured nopo11y helm chart (#12)

* Removed appLabel dependency from the metrics

* Restructured nopo11y helm chart to support multiple services in single deployment

* Values file comment cleanup and added global value for thresholds

* Added global defaults for SLO objectives and alert thresholds

* Used service name instead of deployment name

* Updated latency to latencyMS in alertThresholds

* Replaced .deployment with .service in alerts template

* Updated prepend release name key

* changed service name with deployment name

---------

Co-authored-by: Shehbaz Pathan (Consultant) <Shehbaz.Pathan@ril.com>
---
 charts/nopo11y/Chart.yaml                     |   2 +-
 charts/nopo11y/templates/_helpers.tpl         |  82 ++++++--
 charts/nopo11y/templates/alerts.yaml          |  68 ++++++
 ...{defaultDashboard.yaml => dashboards.yaml} |  61 +++---
 charts/nopo11y/templates/defaultAlerts.yaml   |  60 ------
 charts/nopo11y/templates/defaultSLOs.yaml     | 199 ------------------
 charts/nopo11y/templates/slos.yaml            | 196 +++++++++++++++++
 charts/nopo11y/values.yaml                    |  40 ++--
 8 files changed, 386 insertions(+), 322 deletions(-)
 create mode 100644 charts/nopo11y/templates/alerts.yaml
 rename charts/nopo11y/templates/{defaultDashboard.yaml => dashboards.yaml} (80%)
 delete mode 100644 charts/nopo11y/templates/defaultAlerts.yaml
 delete mode 100644 charts/nopo11y/templates/defaultSLOs.yaml
 create mode 100644 charts/nopo11y/templates/slos.yaml

diff --git a/charts/nopo11y/Chart.yaml b/charts/nopo11y/Chart.yaml
index e0c047a..bac13b9 100644
--- a/charts/nopo11y/Chart.yaml
+++ b/charts/nopo11y/Chart.yaml
@@ -21,4 +21,4 @@ version: 1.0.2
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.1.0"
+appVersion: "2.0.0"
diff --git a/charts/nopo11y/templates/_helpers.tpl b/charts/nopo11y/templates/_helpers.tpl
index c70bb28..3288c04 100644
--- a/charts/nopo11y/templates/_helpers.tpl
+++ b/charts/nopo11y/templates/_helpers.tpl
@@ -1,20 +1,72 @@
-{{- define "dashboard-uid" -}}
-{{- printf "%s-%s" .Release.Name .Release.Namespace | trunc 40 -}}
+{{- define "nopo11y.services" -}}
+{{- $servicesList:= list }}
+{{- $defaulAvailability:= .Values.defaults.slo.availability -}}
+{{- $defaulLatency:= .Values.defaults.slo.latency -}}
+{{- $defaulLatencyThreshold:= .Values.defaults.alertThresholds.latencyMS -}}
+{{- $defaul5xx:= .Values.defaults.alertThresholds.rate5xx -}}
+{{- $default4xx:= .Values.defaults.alertThresholds.rate4xx -}}
+{{- $release:= "" }}
+{{- if .Values.prependReleaseName -}}
+{{- $release = printf "%s-" .Release.Name }}
+{{- end }}
+{{- $namespace:= .Release.Namespace }}
+{{- range .Values.services }}
+{{- $service:= dict }}
+{{- if or (not (hasKey . "deploymentName")) (not (hasKey . "serviceName")) -}}
+{{- fail "deploymentName and serviceName are required for each service" -}}
+{{- else if and (eq .deploymentName "") (eq .serviceName "") -}}
+{{- fail "deploymentName and ServiceName are required for each service" -}}
+{{- end -}}
+{{ $service = set $service "deployment" (printf "%s%s" $release .deploymentName) }}
+{{ $service = set $service "service" (printf "%s%s" $release .serviceName) }}
+{{- if not (hasKey . "slo") }}
+{{ $service = set $service "availability" $defaulAvailability }}
+{{ $service = set $service "latency" $defaulLatency }}
+{{- else if hasKey . "slo" }}
+{{- if not (hasKey .slo "availability") }}
+{{ $service = set $service "availability" $defaulAvailability }}
+{{- else if not .slo.availability }}
+{{ $service = set $service "availability" $defaulAvailability }}
+{{- else }}
+{{ $service = set $service "availability" .slo.availability }}
+{{- end -}}
+{{- if not (hasKey .slo "latency") }}
+{{ $service = set $service "latency" $defaulLatency }}
+{{- else if not .slo.latency }}
+{{ $service = set $service "latency" $defaulLatency }}
+{{- else }}
+{{ $service = set $service "latency" .slo.latency }}
+{{- end }}
+{{- end }}
+{{- if not (hasKey . "alertThresholds") }}
+{{ $service = set $service "rate5xx" $defaul5xx }}
+{{ $service = set $service "rate4xx" $default4xx }}
+{{ $service = set $service "latencyThreshold" $defaulLatencyThreshold }}
+{{- else if hasKey . "alertThresholds" }}
+{{- if not (hasKey .alertThresholds "rate5xx") }}
+{{ $service = set $service "rate5xx" $defaul5xx }}
+{{- else if not .alertThresholds.rate5xx }}
+{{ $service = set $service "rate5xx" $defaul5xx }}
+{{- else }}
+{{ $service = set $service "rate5xx" .alertThresholds.rate5xx }}
+{{- end -}}
+{{- if not (hasKey .alertThresholds "rate4xx") }}
+{{ $service = set $service "rate4xx" $default4xx }}
+{{- else if not .alertThresholds.rate5xx }}
+{{ $service = set $service "rate4xx" $default4xx }}
+{{- else }}
+{{ $service = set $service "rate4xx" .alertThresholds.rate4xx }}
 {{- end -}}
-
-
-{{- define "app.label" -}}
-{{- if .Values.includeReleaseNameInMetricsLabels }}
-{{- printf "%s-%s" .Release.Name .Values.appLabel -}}
-{{- else -}}
-{{ printf "%s" .Values.appLabel }}
+{{- if not (hasKey .alertThresholds "latencyMS") }}
+{{ $service = set $service "latencyThreshold" $defaulLatencyThreshold }}
+{{- else if not .alertThresholds.latencyMS }}
+{{ $service = set $service "latencyThreshold" $defaulLatencyThreshold }}
+{{- else }}
+{{ $service = set $service "latencyThreshold" .alertThresholds.latencyMS }}
 {{- end }}
 {{- end }}
-
-{{- define "deployment.name" -}}
-{{- if .Values.includeReleaseNameInMetricsLabels }}
-{{- printf "%s-%s" .Release.Name .Values.deploymentName -}}
-{{- else -}}
-{{ printf "%s" .Values.deploymentName }}
+{{ $service = set $service "dashboarduid" (printf "%s-%s" .serviceName $namespace) }}
+{{ $servicesList = append $servicesList $service }}
 {{- end }}
+{{- toJson $servicesList }}
 {{- end }}
\ No newline at end of file
diff --git a/charts/nopo11y/templates/alerts.yaml b/charts/nopo11y/templates/alerts.yaml
new file mode 100644
index 0000000..4ce0eeb
--- /dev/null
+++ b/charts/nopo11y/templates/alerts.yaml
@@ -0,0 +1,68 @@
+{{- if .Values.enabled }}
+{{- if eq .Values.namespace ""}}
+{{ fail "values.namespace is required" }}
+{{- end }}
+{{- if and (not .Values.istioMetrics.enabled) (not .Values.nginxIngressMetrics.enabled ) }}
+{{ fail "Enabling either istioMetrics or nginxIngresMetrics is required" }}
+{{- end }}
+{{- range (include "nopo11y.services" . |fromJsonArray) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  namespace: {{ $.Values.namespace }}
+  name: {{ .service }}-nopo11y-alert-rules
+  labels:
+    managedby: nopo11y
+spec:
+  groups:
+  - name: {{ .service }}-nopo11y-alert-rules
+    rules:
+    {{- if $.Values.istioMetrics.enabled }}
+    - alert: {{ .service }}High5xxErrorRate
+      expr: sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}"}[5m])) by (instance) * 100 > {{ .rate5xx }}
+      annotations:
+        description: {{ .service }} service is experiencing high 5xx errors rate from last 5 minutes.
+        summary: {{ .service }} service is experiencing high 5xx error rate.
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/{{ .dashboarduid }}
+        {{- end }}
+      labels:
+            severity: critical
+    - alert: {{ .service }}High4xxErrorRate
+      expr: sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}"}[5m])) by (instance) * 100 > {{ .rate4xx }}
+      for: 5m
+      annotations:
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/{{ .dashboarduid }}
+        {{- end }}
+        description: {{ .service }} service is experiencing high 4xx errors rate from last 5 minutes.
+        summary: {{ .service }} service is experiencing high 4xx error rate.
+      labels:
+            severity: warning
+    {{- end }}
+    {{- if $.Values.nginxIngressMetrics.enabled }}
+    - alert: {{ .service }}High5xxErrorRate-NginxIngress
+      expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}status=~"5..", exported_service="{{ .service }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}exported_service="{{ .service }}"}[5m])) * 100 > {{ .rate5xx }}
+      annotations:
+        description: {{ .service }} service is experiencing high 5xx errors rate from last 5 minutes.
+        summary: {{ .service }} is experiencing high 5xx error rate.
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/{{ .dashboarduid }}
+        {{- end }}
+      labels:
+            severity: critical
+    - alert: {{ .service }}High4xxErrorRate-NginxIngress
+      expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}status=~"4..", exported_service="{{ .service }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}exported_service="{{ .service }}"}[5m])) * 100 > {{ .rate4xx }}
+      for: 10m
+      annotations:
+        description: {{ .service }} service is experiencing high 4xx errors rate from last 5 minutes.
+        summary: {{ .service }} service is experiencing high 4xx error rate.
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/{{ .dashboarduid }}
+        {{- end }}
+      labels:
+            severity: warning
+    {{- end }}
+---
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/charts/nopo11y/templates/defaultDashboard.yaml b/charts/nopo11y/templates/dashboards.yaml
similarity index 80%
rename from charts/nopo11y/templates/defaultDashboard.yaml
rename to charts/nopo11y/templates/dashboards.yaml
index 76de762..87248b3 100644
--- a/charts/nopo11y/templates/defaultDashboard.yaml
+++ b/charts/nopo11y/templates/dashboards.yaml
@@ -1,23 +1,14 @@
-{{- if or (eq .Values.namespace "") (eq .Values.deploymentName "") (eq .Values.appLabel "")}}
-{{ fail "namespace, deploymentName and appLabel are required" }}
-{{- end }}
-{{- if and (not .Values.istioMetrics.enabled) (not .Values.nginxIngressMetrics.enabled ) }}
-{{ fail "Enabling either istioMetrics or nginxIngresMetrics is required" }}
-{{- end }}
-{{- if and (eq .Values.nginxIngressMetrics.ingressName "") .Values.nginxIngressMetrics.enabled }}
-{{ fail "Required nginxIngressMetrics.ingressName" }}
-{{- end }}
-
 {{- if .Values.enabled }}
+{{- range (include "nopo11y.services" . |fromJsonArray) }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: "{{ include "app.label" . }}-service-overview-dashboard" 
-  namespace: {{ .Values.namespace }}
+  name: "{{ .service }}-service-overview-dashboard" 
+  namespace: {{ $.Values.namespace }}
   labels:
     grafana_dashboard: "1"
 data:
-  {{ include "app.label" . }}-overview-dashboard.json: |-
+  {{ .service }}-overview-dashboard.json: |-
     {
       "annotations": {
         "list": [
@@ -48,7 +39,7 @@ data:
       "links": [],
       "liveNow": false,
       "panels": [
-        {{- if .Values.nginxIngressMetrics.enabled }}
+        {{- if $.Values.nginxIngressMetrics.enabled }}
         {
           "collapsed": false,
           "gridPos": {
@@ -147,7 +138,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval]))",
+              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Requests/sec",
               "range": true,
               "refId": "A"
@@ -241,7 +232,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(nginx_ingress_controller_request_duration_seconds_sum{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_request_duration_seconds_count{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval]))",
+              "expr": "sum(rate(nginx_ingress_controller_request_duration_seconds_sum{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_request_duration_seconds_count{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Latency",
               "range": true,
               "refId": "A"
@@ -335,7 +326,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\",status=~\"5..\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}status=~\"5..\", exported_service=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "legendFormat": "5xx error rate",
               "range": true,
               "refId": "A"
@@ -429,7 +420,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\",status=~\"2..|4..\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}status=~\"2..|4..\", exported_service=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "legendFormat": "Success ",
               "range": true,
               "refId": "A"
@@ -440,7 +431,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\",status=~\"5..\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}status=~\"5..\", exported_service=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "hide": false,
               "legendFormat": "Error",
               "range": true,
@@ -535,7 +526,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum (rate(nginx_ingress_controller_response_size_sum{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval]))/sum(rate(nginx_ingress_controller_response_size_count{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}ingress=~\"{{ .Values.nginxIngressMetrics.ingressName }}\", path=\"{{ .Values.nginxIngressMetrics.path }}\"}[$__rate_interval]))",
+              "expr": "sum (rate(nginx_ingress_controller_response_size_sum{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval]))/sum(rate(nginx_ingress_controller_response_size_count{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}exported_service=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Response Size",
               "range": true,
               "refId": "A"
@@ -545,7 +536,7 @@ data:
           "type": "timeseries"
         },
         {{- end }}
-        {{- if .Values.istioMetrics.enabled }}
+        {{- if $.Values.istioMetrics.enabled }}
         {
           "collapsed": false,
           "gridPos": {
@@ -639,7 +630,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval]))",
+              "expr": "sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Requests/sec",
               "range": true,
               "refId": "A"
@@ -733,7 +724,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_request_duration_milliseconds_sum{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval])) / sum(rate(istio_request_duration_milliseconds_count{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval]))",
+              "expr": "sum(rate(istio_request_duration_milliseconds_sum{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(istio_request_duration_milliseconds_count{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Latency",
               "range": true,
               "refId": "A"
@@ -827,7 +818,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\", response_code=~\"5..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\", response_code=~\"5..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "legendFormat": "5xx error rate",
               "range": true,
               "refId": "A"
@@ -921,7 +912,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\", response_code=~\"2..|4..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\", response_code=~\"2..|4..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "legendFormat": "Success ",
               "range": true,
               "refId": "A"
@@ -932,7 +923,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\", response_code=~\"5..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=~\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval])) * 100",
+              "expr": "sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\", response_code=~\"5..\"}[$__rate_interval])) / sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval])) * 100",
               "hide": false,
               "legendFormat": "Error",
               "range": true,
@@ -1028,7 +1019,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(rate(istio_response_bytes_sum{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}app=\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval])) / sum(rate(istio_response_bytes_count{app=\"{{ include "app.label" . }}\", destination_app=~\"{{ include "app.label" . }}\"}[$__rate_interval]))",
+              "expr": "sum(rate(istio_response_bytes_sum{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval])) / sum(rate(istio_response_bytes_count{source_workload!~\"unknown\", reporter=\"source\", destination_service_name=\"{{ .service }}\"}[$__rate_interval]))",
               "legendFormat": "Response Size",
               "range": true,
               "refId": "A"
@@ -1136,7 +1127,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(\r\n    container_memory_working_set_bytes{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", container!=\"\", image!=\"\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ .Release.Namespace }}\", resource=\"memory\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
+              "expr": "sum(\r\n    container_memory_working_set_bytes{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", container!=\"\", image!=\"\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ $.Release.Namespace }}\", resource=\"memory\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
               "legendFormat": "__auto",
               "range": true,
               "refId": "A"
@@ -1230,7 +1221,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(\r\n    container_memory_working_set_bytes{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", container!=\"\", image!=\"\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_limits{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ .Release.Namespace }}\", resource=\"memory\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
+              "expr": "sum(\r\n    container_memory_working_set_bytes{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", container!=\"\", image!=\"\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_limits{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ $.Release.Namespace }}\", resource=\"memory\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
               "legendFormat": "__auto",
               "range": true,
               "refId": "A"
@@ -1324,7 +1315,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(\r\n    rate(container_cpu_usage_seconds_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\"}[5m])\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_requests{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ .Release.Namespace }}\", resource=\"cpu\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
+              "expr": "sum(\r\n    rate(container_cpu_usage_seconds_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\"}[5m])\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_requests{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ $.Release.Namespace }}\", resource=\"cpu\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
               "legendFormat": "__auto",
               "range": true,
               "refId": "A"
@@ -1418,7 +1409,7 @@ data:
                 "uid": "prometheus"
               },
               "editorMode": "code",
-              "expr": "sum(\r\n    rate(container_cpu_usage_seconds_total{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\"}[5m])\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_limits{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ .Release.Namespace }}\", resource=\"cpu\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}namespace=\"{{ .Release.Namespace }}\", workload=\"{{ include "deployment.name" . }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
+              "expr": "sum(\r\n    rate(container_cpu_usage_seconds_total{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\"}[5m])\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod)\r\n/sum(\r\n    kube_pod_container_resource_limits{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}job=\"kube-state-metrics\", namespace=\"{{ $.Release.Namespace }}\", resource=\"cpu\"}\r\n  * on(namespace,pod)\r\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }}namespace=\"{{ $.Release.Namespace }}\", workload=\"{{ .deployment }}\", workload_type=\"deployment\"}\r\n) by (pod) * 100",
               "legendFormat": "__auto",
               "range": true,
               "refId": "A"
@@ -1471,7 +1462,7 @@ data:
                 "uid": "P8E80F9AEF21F6940"
               },
               "editorMode": "code",
-              "expr": "{ {{- if hasKey .Values "cluster" }}cluster=\"{{ .Values.cluster }}\", {{- end }}{{- if and .Values.logLabel .Values.logLabelValue }} {{ .Values.logLabel }}=\"{{ .Values.logLabelValue }}\"{{- else }} app=\"{{ include "app.label" . }}\"{{- end }}, container!=\"istio-proxy\"} |= ``",
+              "expr": "{ {{- if hasKey . "cluster" }}cluster=\"{{ .cluster }}\", {{- end }} pod=~\"{{ .deployment }}.*\", container!=\"istio-proxy\"} |= ``",
               "queryType": "range",
               "refId": "A"
             }
@@ -1495,9 +1486,11 @@ data:
       },
       "timepicker": {},
       "timezone": "",
-      "title": "{{ title ( include "app.label" . ) }} Overview - Dashboard",
-      "uid": "{{ include "dashboard-uid" .}}",
+      "title": "{{ .service }} Overview - Dashboard",
+      "uid": "{{ .dashboarduid }}",
       "version": 10,
       "weekStart": ""
     }
+---
+{{- end }}
 {{- end }}
\ No newline at end of file
diff --git a/charts/nopo11y/templates/defaultAlerts.yaml b/charts/nopo11y/templates/defaultAlerts.yaml
deleted file mode 100644
index cd69fe8..0000000
--- a/charts/nopo11y/templates/defaultAlerts.yaml
+++ /dev/null
@@ -1,60 +0,0 @@
-{{- if .Values.enabled }}
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
-  namespace: {{ .Values.namespace }}
-  name: {{ include "app.label" . }}-default-alert-rules
-  labels:
-    release: {{ .Values.prometheusReleaseLabel }}
-    managedby: nopo11y
-spec:
-  groups:
-  - name: {{ include "app.label" . }}-default-alert-rules
-    rules:
-    {{- if .Values.istioMetrics.enabled }}
-    - alert: {{ include "app.label" . }}High5xxErrorRate
-      expr: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate5xx }}
-      annotations:
-        description: {{ include "app.label" . }} service is experiencing high 5xx errors rate from last 5 minutes.
-        summary: {{ include "app.label" . }} service is experiencing high 5xx error rate.
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" .}}
-        {{- end }}
-      labels:
-            severity: critical
-    - alert: {{ include "app.label" . }}High4xxErrorRate
-      expr: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate4xx }}
-      for: 5m
-      annotations:
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" .}}
-        {{- end }}
-        description: {{ include "app.label" . }} service is experiencing high 4xx errors rate from last 5 minutes.
-        summary: {{ include "app.label" . }} service is experiencing high 4xx error rate.
-      labels:
-            severity: warning
-    {{- end }}
-    {{- if .Values.nginxIngressMetrics.enabled }}
-    - alert: {{ include "app.label" . }}IngressHigh5xxErrorRate
-      expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"5..", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) * 100 > {{ .Values.errorRate5xx }}
-      annotations:
-        description: {{ include "app.label" . }} service is experiencing high 5xx errors rate from last 5 minutes.
-        summary: {{ include "app.label" . }} is experiencing high 5xx error rate.
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" .}}
-        {{- end }}
-      labels:
-            severity: critical
-    - alert: {{ include "app.label" . }}IngressHigh4xxErrorRate
-      expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"4..", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) * 100 > {{ .Values.errorRate4xx }}
-      for: 10m
-      annotations:
-        description: {{ include "app.label" . }} service is experiencing high 4xx errors rate from last 5 minutes.
-        summary: {{ include "app.label" . }} service is experiencing high 4xx error rate.
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/{{ include "dashboard-uid" .}}
-        {{- end }}
-      labels:
-            severity: warning
-    {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/nopo11y/templates/defaultSLOs.yaml b/charts/nopo11y/templates/defaultSLOs.yaml
deleted file mode 100644
index 1172b77..0000000
--- a/charts/nopo11y/templates/defaultSLOs.yaml
+++ /dev/null
@@ -1,199 +0,0 @@
-{{- if .Values.enabled }}
-{{- if .Values.istioMetrics.enabled }}
-apiVersion: sloth.slok.dev/v1
-kind: PrometheusServiceLevel
-metadata:
-  labels:
-    release: {{ .Values.prometheusReleaseLabel }}
-    managedby: nopo11y
-  name: {{ include "app.label" . }}-availability-slo
-  namespace: {{ .Values.namespace }}
-spec:
-  labels:
-    app: sloth
-    role: alert-rules
-    component: {{ include "app.label" . }}-availability-SLO-rules
-  service: {{ include "app.label" . }}
-  slos:
-  - alerting:
-      annotations:
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/slo-detail?var-service={{ include "app.label" . }}
-        {{- end }}
-        summary: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-          be succesfully served (non 5xx). When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-        description: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-          be succesfully served (non 5xx). When you receive this alert it means that the
-          SLO is at risk as your error budget is getting exhausted. To know more about
-          ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-      name: {{ include "app.label" . }} - availability SLO is at RISK 
-      pageAlert:
-        labels:
-          alert_type: symptom
-          severity: critical
-      ticketAlert:
-        labels:
-          alert_type: symptom
-          severity: warning
-    description: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-      be succesfully served (non 5xx). When you receive this alert it means that the
-      SLO is at risk as your error budget is getting exhausted. To know more about
-      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-    name: availability-{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ include "app.label" . }}{{- else }}{{ include "app.label" . }}{{- end }}
-    objective: {{ .Values.availabilitySLO }}
-    sli:
-      events:
-        errorQuery: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"5.."}[{{ printf "{{.window}}" }}]))
-        totalQuery: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[{{ printf "{{.window}}" }}]))
----
-apiVersion: sloth.slok.dev/v1
-kind: PrometheusServiceLevel
-metadata:
-  labels:
-    release: {{ .Values.prometheusReleaseLabel }}
-    managedby: nopo11y
-  name: {{ include "app.label" . }}-latency-slo
-  namespace: {{ .Values.namespace }}
-spec:
-  labels:
-    app: sloth
-    role: alert-rules
-    component: {{ include "app.label" . }}-latency-SLO-rules
-  service: {{ include "app.label" . }}
-  slos:
-  - alerting:
-      annotations:
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/slo-detail?var-service={{ include "app.label" . }}
-        {{- end }}
-        summary: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-        description: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-      name: {{ include "app.label" . }} - latency SLO is at RISK
-      pageAlert:
-        labels:
-          alert_type: symptom
-          severity: critical
-      ticketAlert:
-        labels:
-          alert_type: symptom
-          severity: warning
-    description: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that the
-      SLO is at risk as your error budget is getting exhausted. To know more about
-      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-    name: latency-{{- if .Values.includeReleaseNameInMetricsLabels }}{{ .Release.Name }}-{{ include "app.label" . }}{{- else }}{{ include "app.label" . }}{{- end }}
-    objective: {{ .Values.latencySLO }}
-    sli:
-      events:
-        errorQuery: (sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", le="+Inf"}[{{ printf "{{.window}}" }}])) - sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", le="{{ .Values.latency }}"}[{{ printf "{{.window}}" }}])))
-        totalQuery: sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", le="+Inf"}[{{ printf "{{.window}}" }}]))
-{{- end }}
-{{- end }}
----
-{{- if .Values.enabled }}
-{{- if .Values.nginxIngressMetrics.enabled }}
-apiVersion: sloth.slok.dev/v1
-kind: PrometheusServiceLevel
-metadata:
-  labels:
-    release: {{ .Values.prometheusReleaseLabel }}
-    managedby: nopo11y
-  name: {{ include "app.label" . }}-ingress-availability-slo
-  namespace: {{ .Values.namespace }}
-spec:
-  labels:
-    app: sloth
-    role: alert-rules
-    component: {{ include "app.label" . }}-ingress-availability-SLO-rules
-  service: {{ include "app.label" . }}-ingress
-  slos:
-  - alerting:
-      annotations:
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/slo-detail?var-service={{ include "app.label" . }}
-        {{- end }}
-        summary: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-          be succesfully served (non 5xx). When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-        description: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-          be succesfully served (non 5xx). When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-      name: {{ include "app.label" . }}-ingress - availability SLO is at RISK
-      pageAlert:
-        labels:
-          alert_type: symptom
-          severity: critical
-      ticketAlert:
-        labels:
-          alert_type: symptom
-          severity: warning
-    description: SLO to measure success vs errors - {{ .Values.availabilitySLO }}% of the time requests should
-      be succesfully served (non 5xx). When you receive this alert it means that the
-      SLO is at risk as your error budget is getting exhausted. To know more about
-      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-    name: availability-{{ include "app.label" . }}-ingress
-    objective: {{ .Values.availabilitySLO }}
-    sli:
-      events:
-        errorQuery: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"5..", path="{{ .Values.nginxIngressMetrics.path }}"}[{{ printf "{{.window}}" }}]))
-        totalQuery: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}", path="{{ .Values.nginxIngressMetrics.path }}"}[{{ printf "{{.window}}" }}]))
----
-apiVersion: sloth.slok.dev/v1
-kind: PrometheusServiceLevel
-metadata:
-  labels:
-    release: {{ .Values.prometheusReleaseLabel }}
-    managedby: nopo11y
-  name: {{ include "app.label" . }}-ingress-latency-slo
-  namespace: {{ .Values.namespace }}
-spec:
-  labels:
-    app: sloth
-    role: alert-rules
-    component: {{ include "app.label" . }}-ingress-latency-SLO-rules
-  service: {{ include "app.label" . }}-ingress
-  slos:
-  - alerting:
-      annotations:
-        {{- if .Values.grafanaURL }}
-        dashboard: {{ .Values.grafanaURL }}/d/slo-detail?var-service={{ include "app.label" . }}
-        {{- end }}
-        summary: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-        description: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that
-          the SLO is at risk as your error budget is getting exhausted. To know more
-          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-      name: {{ include "app.label" . }}-ingress - latency SLO is at RISK
-      pageAlert:
-        labels:
-          alert_type: symptom
-          severity: critical
-      ticketAlert:
-        labels:
-          alert_type: symptom
-          severity: warning
-    description: SLO to measure response time - {{ .Values.latencySLO }}% of the time requests should
-          be succesfully served in < {{ .Values.latency }}ms. When you receive this alert it means that the
-      SLO is at risk as your error budget is getting exhausted. To know more about
-      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
-    name: latency-{{ include "app.label" . }}-ingress
-    objective: {{ .Values.latencySLO }}
-    sli:
-      events:
-        errorQuery: (sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",le="+Inf", path="{{ .Values.nginxIngressMetrics.path }}"}[{{ printf "{{.window}}" }}])) - sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",le="{{ divf .Values.latency 1000 }}", path="{{ .Values.nginxIngressMetrics.path }}"}[{{ printf "{{.window}}" }}])))
-        totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",le="+Inf", path="{{ .Values.nginxIngressMetrics.path }}"}[{{ printf "{{.window}}" }}]))
-{{- end }}
-{{- end }}
diff --git a/charts/nopo11y/templates/slos.yaml b/charts/nopo11y/templates/slos.yaml
new file mode 100644
index 0000000..d469164
--- /dev/null
+++ b/charts/nopo11y/templates/slos.yaml
@@ -0,0 +1,196 @@
+{{- if .Values.enabled }}
+{{- range (include "nopo11y.services" . |fromJsonArray) }}
+{{- if $.Values.istioMetrics.enabled }}
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  labels:
+    managedby: nopo11y
+  name: {{ .service }}-availability-slo
+  namespace: {{ $.Values.namespace }}
+spec:
+  labels:
+    app: sloth
+    role: alert-rules
+    component: {{ .service }}-availability-SLO-rules
+  service: {{ .service }}
+  slos:
+  - alerting:
+      annotations:
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/slo-detail?var-service={{ .service }}
+        {{- end }}
+        summary: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+          be succesfully served (non 5xx). When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+        description: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+          be succesfully served (non 5xx). When you receive this alert it means that the
+          SLO is at risk as your error budget is getting exhausted. To know more about
+          ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+      name: {{ .service }} - availability SLO is at RISK 
+      pageAlert:
+        labels:
+          alert_type: symptom
+          severity: critical
+      ticketAlert:
+        labels:
+          alert_type: symptom
+          severity: warning
+    description: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+      be succesfully served (non 5xx). When you receive this alert it means that the
+      SLO is at risk as your error budget is getting exhausted. To know more about
+      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+    name: availability-{{ .service }}
+    objective: {{ .availability }}
+    sli:
+      events:
+        errorQuery: sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", response_code=~"5.."}[{{ printf "{{.window}}" }}]))
+        totalQuery: sum(rate(istio_requests_total{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}"}[{{ printf "{{.window}}" }}]))
+---
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  labels:
+    managedby: nopo11y
+  name: {{ .service }}-latency-slo
+  namespace: {{ $.Values.namespace }}
+spec:
+  labels:
+    app: sloth
+    role: alert-rules
+    component: {{ .service }}-latency-SLO-rules
+  service: {{ .service }}
+  slos:
+  - alerting:
+      annotations:
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/slo-detail?var-service={{ .service }}
+        {{- end }}
+        summary: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+        description: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+      name: {{ .service }} - latency SLO is at RISK
+      pageAlert:
+        labels:
+          alert_type: symptom
+          severity: critical
+      ticketAlert:
+        labels:
+          alert_type: symptom
+          severity: warning
+    description: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that the
+      SLO is at risk as your error budget is getting exhausted. To know more about
+      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+    name: latency-{{ .service }}
+    objective: {{ .latency }}
+    sli:
+      events:
+        errorQuery: (sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", le="+Inf"}[{{ printf "{{.window}}" }}])) - sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", le="{{ .latencyThreshold }}"}[{{ printf "{{.window}}" }}])))
+        totalQuery: sum(rate(istio_request_duration_milliseconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}source_workload!~"unknown", reporter="source", destination_service_name="{{ .service }}", le="+Inf"}[{{ printf "{{.window}}" }}]))
+---
+{{- end }}
+{{- if $.Values.nginxIngressMetrics.enabled }}
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  labels:
+    managedby: nopo11y
+  name: {{ .service }}-ingress-availability-slo
+  namespace: {{ $.Values.namespace }}
+spec:
+  labels:
+    app: sloth
+    role: alert-rules
+    component: {{ .service }}-ingress-availability-SLO-rules
+  service: {{ .service }}
+  slos:
+  - alerting:
+      annotations:
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/slo-detail?var-service={{ .service }}
+        {{- end }}
+        summary: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+          be succesfully served (non 5xx). When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+        description: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+          be succesfully served (non 5xx). When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+      name: {{ .service }}-ingress - availability SLO is at RISK
+      pageAlert:
+        labels:
+          alert_type: symptom
+          severity: critical
+      ticketAlert:
+        labels:
+          alert_type: symptom
+          severity: warning
+    description: SLO to measure success vs errors - {{ .availability }}% of the time requests should
+      be succesfully served (non 5xx). When you receive this alert it means that the
+      SLO is at risk as your error budget is getting exhausted. To know more about
+      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+    name: availability-{{ .service }}-ingress
+    objective: {{ .availability }}
+    sli:
+      events:
+        errorQuery: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}status=~"5..", exported_service="{{ .service }}"}[{{ printf "{{.window}}" }}]))
+        totalQuery: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}exported_service="{{ .service }}"}[{{ printf "{{.window}}" }}]))
+---
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  labels:
+    managedby: nopo11y
+  name: {{ .service }}-ingress-latency-slo
+  namespace: {{ $.Values.namespace }}
+spec:
+  labels:
+    app: sloth
+    role: alert-rules
+    component: {{ .service }}-ingress-latency-SLO-rules
+  service: {{ .service }}
+  slos:
+  - alerting:
+      annotations:
+        {{- if $.Values.grafanaURL }}
+        dashboard: {{ $.Values.grafanaURL }}/d/slo-detail?var-service={{ .service }}
+        {{- end }}
+        summary: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+        description: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that
+          the SLO is at risk as your error budget is getting exhausted. To know more
+          about ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+      name: {{ .service }} - latency SLO is at RISK
+      pageAlert:
+        labels:
+          alert_type: symptom
+          severity: critical
+      ticketAlert:
+        labels:
+          alert_type: symptom
+          severity: warning
+    description: SLO to measure response time - {{ .latency }}% of the time requests should
+          be succesfully served in < {{ .latencyThreshold }}ms. When you receive this alert it means that the
+      SLO is at risk as your error budget is getting exhausted. To know more about
+      ErrorBudgets and SLOs read https://sre.google/workbook/implementing-slos/
+    name: latency-{{ .service }}-ingress
+    objective: {{ .latency }}
+    sli:
+      events:
+        errorQuery: (sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}le="+Inf", exported_service="{{ .service }}"}[{{ printf "{{.window}}" }}])) - sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}le="{{ divf .latencyThreshold 1000 }}", exported_service="{{ .service }}"}[{{ printf "{{.window}}" }}])))
+        totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ {{- if hasKey . "cluster" }}cluster="{{ .cluster }}", {{- end }}le="+Inf", exported_service="{{ .service }}"}[{{ printf "{{.window}}" }}]))
+---
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/charts/nopo11y/values.yaml b/charts/nopo11y/values.yaml
index 48e6933..6496629 100644
--- a/charts/nopo11y/values.yaml
+++ b/charts/nopo11y/values.yaml
@@ -1,20 +1,34 @@
 enabled: false
+
+defaults:
+  slo:
+    availability: 99.9
+    latency: 99
+  alertThresholds:
+    latencyMS: 100
+    rate5xx: 0.05
+    rate4xx: 5
+
 namespace: observability
-appLabel: "sample"
-includeReleaseNameInMetricsLabels: false
-deploymentName: "sample"
-prometheusReleaseLabel: "nopo11y-stack"
-availabilitySLO: 99.9
-latencySLO: 99
-latency: 1000
-errorRate5xx: 0.05
-errorRate4xx: 5
+
+prependReleaseName: false
+
 grafanaURL: ""
-logLabel: ""
-logLabelValue: ""
+
 istioMetrics:
   enabled: true
+
 nginxIngressMetrics:
   enabled: false
-  ingressName: "sample-ingress"
-  path: "/"
\ No newline at end of file
+services: []
+# - serviceName: "sample"
+#   deploymentName: "sample"
+
+#   slo: {}
+#     # availability: 99.9
+#     # latency: 99
+ 
+#   alertThresholds: {}
+#     # latencyMS: 100
+#     # rate4xx: 5
+#     # rate5xx: 0.05
\ No newline at end of file