Skip to content

Commit

Permalink
Merge pull request #68 from appuio/fix/ingress-sli
Browse files Browse the repository at this point in the history
Fix ingress error ratio query to only consider the canary route for the cluster's `appsDomain`
  • Loading branch information
simu authored May 15, 2024
2 parents e03df70 + aaf49a3 commit 2f5e031
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
12 changes: 8 additions & 4 deletions component/slos.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ local defaultSlos = {
},
ingress: {
local config = params.slos.ingress,
local os = com.getValueOrDefault(inv.parameters, 'openshift', {}),
local appsDomain = com.getValueOrDefault(os, 'appsDomain', ''),
local os = std.get(inv.parameters, 'openshift', {}),
// NOTE: appsDomain should always be present if we have parameter `openshift`.
local appsDomain = std.get(os, 'appsDomain', ''),
local canaryRoute = 'canary-openshift-ingress-canary.%s' % appsDomain,

extra_rules: [
{
Expand All @@ -111,14 +113,16 @@ local defaultSlos = {
description: 'OpenShift ingress SLO based on canary availability',
sli: {
raw: {
error_ratio_query: '1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[{{.window}}])',
error_ratio_query:
'1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{%s}[{{.window}}])'
% [ if appsDomain != '' then 'host="%s"' % canaryRoute else '' ],
},
},
alerting: {
name: 'SLO_ClusterIngressFailure',
annotations: {
summary: 'Probes to ingress canary fail',
[if appsDomain != '' then 'canary_url']: 'canary-openshift-ingress-canary.%s' % appsDomain,
[if appsDomain != '' then 'canary_url']: canaryRoute,
},
page_alert: {},
ticket_alert: {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,49 @@ spec:
groups:
- name: sloth-slo-sli-recordings-ingress-canary
rules:
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[5m]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[5m]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 5m
record: slo:sli_error:ratio_rate5m
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[30m]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[30m]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 30m
record: slo:sli_error:ratio_rate30m
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[1h]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[1h]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 1h
record: slo:sli_error:ratio_rate1h
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[2h]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[2h]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 2h
record: slo:sli_error:ratio_rate2h
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[6h]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[6h]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 6h
record: slo:sli_error:ratio_rate6h
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[1d]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[1d]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
sloth_slo: canary
sloth_window: 1d
record: slo:sli_error:ratio_rate1d
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance[3d]))
- expr: (1 - avg_over_time(appuio_ocp4_slo:ingress_canary_route_reachable:no_instance{host="canary-openshift-ingress-canary.apps.foo.example.com"}[3d]))
labels:
sloth_id: ingress-canary
sloth_service: ingress
Expand Down

0 comments on commit 2f5e031

Please sign in to comment.