From e654ece0d9eeb9107ab9a3c8d11e3d697bf928fd Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:13:43 +0100 Subject: [PATCH] rhobs: align config with big observatorium pr (#671) * align config with big observatorium pr Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> * add missing image tag Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --------- Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 58 +- go.sum | 142 +++-- ...rium-metrics-compact-default-template.yaml | 96 +-- ...ics-receive-ingestor-default-template.yaml | 62 +- ...torium-metrics-ruler-default-template.yaml | 26 +- ...torium-metrics-store-default-template.yaml | 447 +++++++------- .../rhobs/observatorium-api-template.yaml | 479 ++++++++------- ...atorium-metrics-alertmanager-template.yaml | 52 +- ...orium-metrics-query-frontend-template.yaml | 528 ++++++++--------- ...rvatorium-metrics-query-rule-template.yaml | 224 +++---- .../observatorium-metrics-query-template.yaml | 224 +++---- ...orium-metrics-receive-router-template.yaml | 66 ++- ...atorium-metrics-compact-rhel-template.yaml | 96 +-- ...etrics-receive-ingestor-rhel-template.yaml | 62 +- ...rvatorium-metrics-ruler-rhel-template.yaml | 26 +- ...rvatorium-metrics-store-rhel-template.yaml | 129 +++-- ...um-metrics-compact-telemeter-template.yaml | 96 +-- ...s-receive-ingestor-telemeter-template.yaml | 62 +- ...rium-metrics-ruler-telemeter-template.yaml | 76 +-- ...rium-metrics-store-telemeter-template.yaml | 129 +++-- ...rium-metrics-compact-default-template.yaml | 96 +-- ...ics-receive-ingestor-default-template.yaml | 62 +- ...torium-metrics-ruler-default-template.yaml | 26 +- ...torium-metrics-store-default-template.yaml | 447 +++++++------- .../rhobs/observatorium-api-template.yaml | 456 ++++++++------- ...atorium-metrics-alertmanager-template.yaml | 52 +- ...orium-metrics-query-frontend-template.yaml | 528 ++++++++--------- ...rvatorium-metrics-query-rule-template.yaml | 224 +++---- .../observatorium-metrics-query-template.yaml | 224 +++---- ...orium-metrics-receive-router-template.yaml | 66 ++- ...atorium-metrics-compact-rhel-template.yaml | 96 +-- ...etrics-receive-ingestor-rhel-template.yaml | 62 +- ...rvatorium-metrics-ruler-rhel-template.yaml | 26 +- ...rvatorium-metrics-store-rhel-template.yaml | 129 +++-- ...um-metrics-compact-telemeter-template.yaml | 96 +-- ...s-receive-ingestor-telemeter-template.yaml | 62 +- ...rium-metrics-ruler-telemeter-template.yaml | 26 +- ...rium-metrics-store-telemeter-template.yaml | 129 +++-- services_go/instances/rhobs/rhobs.go | 17 +- services_go/observatorium/api.go | 129 +++-- services_go/observatorium/cache.go | 12 +- services_go/observatorium/helpers.go | 2 + services_go/observatorium/metrics.go | 548 +++++++++--------- 43 files changed, 3373 insertions(+), 3222 deletions(-) diff --git a/go.mod b/go.mod index 5fbde421c52..cb84a76faa2 100644 --- a/go.mod +++ b/go.mod @@ -7,14 +7,13 @@ require ( github.com/google/go-jsonnet v0.20.0 github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44 github.com/observatorium/observatorium v0.0.0-00010101000000-000000000000 - github.com/observatorium/up v0.0.0-20221027030018-d8bb06fa1e34 + github.com/observatorium/up v0.0.0-20240109115132-3a34c4c4fa24 github.com/openshift/api v3.9.0+incompatible github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.70.0 - github.com/prometheus/common v0.45.0 - github.com/pyrra-dev/pyrra v0.7.0 + github.com/pyrra-dev/pyrra v0.7.2 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.28.4 - k8s.io/apimachinery v0.28.4 + k8s.io/api v0.29.0 + k8s.io/apimachinery v0.29.0 ) require ( @@ -24,23 +23,23 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dennwc/varint v1.0.0 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/emicklei/go-restful/v3 v3.11.1 // indirect github.com/evanphx/json-patch/v5 v5.7.0 // indirect - github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect - github.com/go-logr/logr v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.20.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.4 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.20.2 // indirect + github.com/go-openapi/jsonreference v0.20.4 // indirect + github.com/go-openapi/swag v0.22.7 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20230406165453-00490a63f317 // indirect - github.com/google/uuid v1.3.1 // indirect + github.com/google/uuid v1.5.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -51,38 +50,39 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.17.0 // indirect - github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect + github.com/prometheus/client_golang v1.18.0 // indirect + github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/prometheus/prometheus v1.8.2-0.20220211202545-56e14463bccf // indirect github.com/rodaine/hclencoder v0.0.1 // indirect github.com/stretchr/testify v1.8.4 // indirect go.uber.org/atomic v1.11.0 // indirect - go.uber.org/goleak v1.2.1 // indirect + go.uber.org/goleak v1.3.0 // indirect go.uber.org/zap v1.26.0 // indirect - golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect - golang.org/x/net v0.19.0 // indirect - golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.15.0 // indirect - golang.org/x/term v0.15.0 // indirect + golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect + golang.org/x/net v0.20.0 // indirect + golang.org/x/oauth2 v0.16.0 // indirect + golang.org/x/sys v0.16.0 // indirect + golang.org/x/term v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.3.0 // indirect + golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.8 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.32.0 // indirect gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/client-go v0.28.4 // indirect - k8s.io/component-base v0.28.4 // indirect + k8s.io/apiextensions-apiserver v0.29.0 // indirect + k8s.io/client-go v0.29.0 // indirect k8s.io/klog/v2 v2.110.1 // indirect - k8s.io/kube-openapi v0.0.0-20230918164632-68afd615200d // indirect - k8s.io/utils v0.0.0-20231127182322-b307cd553661 // indirect + k8s.io/kube-openapi v0.0.0-20240105020646-a37d4de58910 // indirect + k8s.io/utils v0.0.0-20240102154912-e7106e64919e // indirect sigs.k8s.io/controller-runtime v0.16.3 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) // Delete when https://github.com/observatorium/observatorium/pull/543 is merged to main branch -replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20240105161024-101d341092f9 +replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20240110120013-cb54d2f5d8a9 diff --git a/go.sum b/go.sum index 89dc13431f0..363bdc4521c 100644 --- a/go.sum +++ b/go.sum @@ -387,8 +387,8 @@ github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkg github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.11.1 h1:S+9bSbua1z3FgCnV0KKOSSZ3mDthb5NyEPL5gEpCvyk= +github.com/emicklei/go-restful/v3 v3.11.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -419,8 +419,8 @@ github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fullsailor/pkcs7 v0.0.0-20190404230743-d7302db945fa/go.mod h1:KnogPXtdwXqoenmZCw6S+25EAm2MkxbG0deNDu4cbSA= github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= github.com/getkin/kin-openapi v0.53.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= @@ -456,10 +456,11 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7 github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= -github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= @@ -485,17 +486,16 @@ github.com/go-openapi/jsonpointer v0.18.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwds github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonpointer v0.20.0 h1:ESKJdU9ASRfaPNOPRx12IUyA1vn3R9GiE3KYD14BXdQ= -github.com/go-openapi/jsonpointer v0.20.0/go.mod h1:6PGzBjjIIumbLYysB73Klnms1mwnU4G3YHOECG3CedA= +github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbXz58sAx6Q= +github.com/go-openapi/jsonpointer v0.20.2/go.mod h1:bHen+N0u1KEO3YlmqOjTT9Adn1RfD91Ar825/PuiRVs= github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= github.com/go-openapi/jsonreference v0.17.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= github.com/go-openapi/jsonreference v0.18.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/jsonreference v0.20.4 h1:bKlDxQxQJgwpUSgOENiMPzCTBVuc7vTdXSSgNeAhojU= +github.com/go-openapi/jsonreference v0.20.4/go.mod h1:5pZJyJP2MnYCpoeoMAql78cCHauHj0V9Lhc506VOpw4= github.com/go-openapi/loads v0.17.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= github.com/go-openapi/loads v0.18.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= github.com/go-openapi/loads v0.19.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= @@ -548,9 +548,8 @@ github.com/go-openapi/swag v0.19.12/go.mod h1:eFdyEBkTdoAf/9RXBvj4cr1nH7GD8Kzo5H github.com/go-openapi/swag v0.19.13/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= -github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.7 h1:JWrc1uc/P9cSomxfnsFSVWoE1FW6bNbrVPmpQYpCcR8= +github.com/go-openapi/swag v0.22.7/go.mod h1:Gl91UqO+btAM0plGGxHqJcQZ1ZTy6jbmridBTsDy8A0= github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4= github.com/go-openapi/validate v0.19.2/go.mod h1:1tRCw7m3jtI8eNWEEliiAqUIcBztB2KDnRCRMUi7GTA= github.com/go-openapi/validate v0.19.3/go.mod h1:90Vh6jjkTn+OT1Eefm0ZixWNFjhtOH7vS9k0lo6zwJo= @@ -680,8 +679,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-jsonnet v0.20.0 h1:WG4TTSARuV7bSm4PMB4ohjxe33IHT5WVTrJSU33uT4g= github.com/google/go-jsonnet v0.20.0/go.mod h1:VbgWF9JX7ztlv770x/TolZNGGFfiHEVx9G6ca2eUmeA= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -716,8 +716,8 @@ github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= @@ -1000,8 +1000,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44 h1:QX1PSo1E9PdUbVJkA5FhZ1BA0GzDTfDLW3dbrGbjU5k= github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44/go.mod h1:xwDIn6xpTsymHor6ST57bJQm4FXjey31OfHyEKDFsdM= -github.com/observatorium/up v0.0.0-20221027030018-d8bb06fa1e34 h1:VM0MbtZUgtdQGdJbAKAaHtqsLjjv3mK8zEpblGtqWDA= -github.com/observatorium/up v0.0.0-20221027030018-d8bb06fa1e34/go.mod h1:RuQ+HNob3rNcejNk6HIL9x19WFA8h0lVG1nloN7wFd8= +github.com/observatorium/up v0.0.0-20240109115132-3a34c4c4fa24 h1:onM/JJDVL9vEQsSyBJhYbc3KseW79vnu64Qe5WMcswM= +github.com/observatorium/up v0.0.0-20240109115132-3a34c4c4fa24/go.mod h1:06ATHnkbnd7AvcI2GcwUdfS6UKfPzD8bf5LKfd4T89w= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= @@ -1018,8 +1018,8 @@ github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= -github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= +github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= +github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= @@ -1027,8 +1027,8 @@ github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= -github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= -github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= +github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= +github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= @@ -1114,8 +1114,8 @@ github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3O github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= -github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= -github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= +github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= +github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= @@ -1123,8 +1123,8 @@ github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1: github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM= -github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= +github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= github.com/prometheus/common v0.0.0-20180110214958-89604d197083/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= @@ -1164,8 +1164,8 @@ github.com/prometheus/prometheus v0.0.0-20200609090129-a6600f564e3c/go.mod h1:S5 github.com/prometheus/prometheus v1.8.2-0.20220211202545-56e14463bccf h1:eOn9dAniHar8MUiYYSt9bpaGfyAViEudAPLLIcN/5zM= github.com/prometheus/prometheus v1.8.2-0.20220211202545-56e14463bccf/go.mod h1:TyGuQvrvTD1pBDaeQIpTMvsFCSR7wrvxsn4yegjLjVs= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/pyrra-dev/pyrra v0.7.0 h1:INRujT5krJgsZIsX+2lVi5bvrh2J2ukmkWvSKnozAgM= -github.com/pyrra-dev/pyrra v0.7.0/go.mod h1:BuHlOQMd/hoHKqznZ2g61ZvxVTEEt7DeJvCqAdGbULE= +github.com/pyrra-dev/pyrra v0.7.2 h1:FWA1gUk8FhmhySOEG4i9h5+oTHacfVdSOyS9aODILG4= +github.com/pyrra-dev/pyrra v0.7.2/go.mod h1:BuHlOQMd/hoHKqznZ2g61ZvxVTEEt7DeJvCqAdGbULE= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/retailnext/hllpp v1.0.1-0.20180308014038-101a6d2f8b52/go.mod h1:RDpi1RftBQPUCDRw6SmxeaREsAaRKnOclghuzp/WRzc= github.com/rodaine/hclencoder v0.0.1 h1:1jK2rGFxSDT1eU9oVjK4ewrIhMWTcc0yCfZMiN6xRJM= @@ -1175,8 +1175,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/rs/cors v1.6.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/cors v1.8.0/go.mod h1:EBwu+T5AvHOcXwvZIkQFjUN6s8Czyqw12GL/Y0tUyRM= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -1240,8 +1240,6 @@ github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -1250,17 +1248,14 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= -github.com/thibaultmg/observatorium v0.0.0-20240105161024-101d341092f9 h1:A+TcmA/7KHIAvUce9049FRZK1jBdKDPYBCyq4j5ff18= -github.com/thibaultmg/observatorium v0.0.0-20240105161024-101d341092f9/go.mod h1:VFiHODMs9Mnd2DGCtYBr6qdKBZwj6gmwgxilTmnv4EE= +github.com/thibaultmg/observatorium v0.0.0-20240110120013-cb54d2f5d8a9 h1:xLfqDEapSzdJ/gmXe6J99jqn2CpNAVj/qrDjIEGIcJM= +github.com/thibaultmg/observatorium v0.0.0-20240110120013-cb54d2f5d8a9/go.mod h1:zws37roytk03NmLqS7IEiE/rs4v4eQS8Q2lz0LtfHS8= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tinylib/msgp v1.0.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= @@ -1354,8 +1349,8 @@ go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -1418,8 +1413,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= +golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc h1:ao2WRsKSzW6KuUY9IWPwWahcHCgR0s52IfwutMfEbdM= +golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -1520,8 +1515,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220105145211-5b0dc2dfae98/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= -golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1538,8 +1533,8 @@ golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= -golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= +golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1672,15 +1667,14 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= -golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= +golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= +golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1704,8 +1698,8 @@ golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -1788,8 +1782,8 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.9-0.20211209172050-90a85b2969be/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= +golang.org/x/tools v0.16.0 h1:GO788SKMRunPIBCXiQyo2AaexLstOrVhuAL5YwsckQM= +golang.org/x/tools v0.16.0/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1963,8 +1957,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= @@ -2025,17 +2019,17 @@ k8s.io/api v0.20.1/go.mod h1:KqwcCVogGxQY3nBlRpwt+wpAMF/KjaCc7RpywacvqUo= k8s.io/api v0.20.4/go.mod h1:++lNL1AJMkDymriNniQsWRkMDzRaX2Y/POTUi8yvqYQ= k8s.io/api v0.20.6/go.mod h1:X9e8Qag6JV/bL5G6bU8sdVRltWKmdHsFUGS3eVndqE8= k8s.io/api v0.22.4/go.mod h1:Rgs+9gIGYC5laXQSZZ9JqT5NevNgoGiOdVWi1BAB3qk= -k8s.io/api v0.28.4 h1:8ZBrLjwosLl/NYgv1P7EQLqoO8MGQApnbgH8tu3BMzY= -k8s.io/api v0.28.4/go.mod h1:axWTGrY88s/5YE+JSt4uUi6NMM+gur1en2REMR7IRj0= -k8s.io/apiextensions-apiserver v0.28.4 h1:AZpKY/7wQ8n+ZYDtNHbAJBb+N4AXXJvyZx6ww6yAJvU= -k8s.io/apiextensions-apiserver v0.28.4/go.mod h1:pgQIZ1U8eJSMQcENew/0ShUTlePcSGFq6dxSxf2mwPM= +k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A= +k8s.io/api v0.29.0/go.mod h1:sdVmXoz2Bo/cb77Pxi71IPTSErEW32xa4aXwKH7gfBA= +k8s.io/apiextensions-apiserver v0.29.0 h1:0VuspFG7Hj+SxyF/Z/2T0uFbI5gb5LRgEyUVE3Q4lV0= +k8s.io/apiextensions-apiserver v0.29.0/go.mod h1:TKmpy3bTS0mr9pylH0nOt/QzQRrW7/h7yLdRForMZwc= k8s.io/apimachinery v0.17.5/go.mod h1:ioIo1G/a+uONV7Tv+ZmCbMG1/a3kVw5YcDdncd8ugQ0= k8s.io/apimachinery v0.20.1/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= k8s.io/apimachinery v0.20.4/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= k8s.io/apimachinery v0.20.6/go.mod h1:ejZXtW1Ra6V1O5H8xPBGz+T3+4gfkTCeExAHKU57MAc= k8s.io/apimachinery v0.22.4/go.mod h1:yU6oA6Gnax9RrxGzVvPFFJ+mpnW6PBSqp0sx0I0HHW0= -k8s.io/apimachinery v0.28.4 h1:zOSJe1mc+GxuMnFzD4Z/U1wst50X28ZNsn5bhgIIao8= -k8s.io/apimachinery v0.28.4/go.mod h1:wI37ncBvfAoswfq626yPTe6Bz1c22L7uaJ8dho83mgg= +k8s.io/apimachinery v0.29.0 h1:+ACVktwyicPz0oc6MTMLwa2Pw3ouLAfAon1wPLtG48o= +k8s.io/apimachinery v0.29.0/go.mod h1:eVBxQ/cwiJxH58eK/jd/vAk4mrxmVlnpBH5J2GbMeis= k8s.io/apiserver v0.20.1/go.mod h1:ro5QHeQkgMS7ZGpvf4tSMx6bBOgPfE+f52KwvXfScaU= k8s.io/apiserver v0.20.4/go.mod h1:Mc80thBKOyy7tbvFtB4kJv1kbdD0eIH8k8vianJcbFM= k8s.io/apiserver v0.20.6/go.mod h1:QIJXNt6i6JB+0YQRNcS0hdRHJlMhflFmsBDeSgT1r8Q= @@ -2044,13 +2038,13 @@ k8s.io/client-go v0.20.1/go.mod h1:/zcHdt1TeWSd5HoUe6elJmHSQ6uLLgp4bIJHVEuy+/Y= k8s.io/client-go v0.20.4/go.mod h1:LiMv25ND1gLUdBeYxBIwKpkSC5IsozMMmOOeSJboP+k= k8s.io/client-go v0.20.6/go.mod h1:nNQMnOvEUEsOzRRFIIkdmYOjAZrC8bgq0ExboWSU1I0= k8s.io/client-go v0.22.4/go.mod h1:Yzw4e5e7h1LNHA4uqnMVrpEpUs1hJOiuBsJKIlRCHDA= -k8s.io/client-go v0.28.4 h1:Np5ocjlZcTrkyRJ3+T3PkXDpe4UpatQxj85+xjaD2wY= -k8s.io/client-go v0.28.4/go.mod h1:0VDZFpgoZfelyP5Wqu0/r/TRYcLYuJ2U1KEeoaPa1N4= +k8s.io/client-go v0.29.0 h1:KmlDtFcrdUzOYrBhXHgKw5ycWzc3ryPX5mQe0SkG3y8= +k8s.io/client-go v0.29.0/go.mod h1:yLkXH4HKMAywcrD82KMSmfYg2DlE8mepPR4JGSo5n38= k8s.io/component-base v0.20.1/go.mod h1:guxkoJnNoh8LNrbtiQOlyp2Y2XFCZQmrcg2n/DeYNLk= k8s.io/component-base v0.20.4/go.mod h1:t4p9EdiagbVCJKrQ1RsA5/V4rFQNDfRlevJajlGwgjI= k8s.io/component-base v0.20.6/go.mod h1:6f1MPBAeI+mvuts3sIdtpjljHWBQ2cIy38oBIWMYnrM= -k8s.io/component-base v0.28.4 h1:c/iQLWPdUgI90O+T9TeECg8o7N3YJTiuz2sKxILYcYo= -k8s.io/component-base v0.28.4/go.mod h1:m9hR0uvqXDybiGL2nf/3Lf0MerAfQXzkfWhUY58JUbU= +k8s.io/component-base v0.29.0 h1:T7rjd5wvLnPBV1vC4zWd/iWRbV8Mdxs+nGaoaFzGw3s= +k8s.io/component-base v0.29.0/go.mod h1:sADonFTQ9Zc9yFLghpDpmNXEdHyQmFIGbiuZbqAXQ1M= k8s.io/cri-api v0.17.3/go.mod h1:X1sbHmuXhwaHs9xxYffLqJogVsnI+f6cPRcgPel7ywM= k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= @@ -2069,15 +2063,15 @@ k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= k8s.io/kube-openapi v0.0.0-20200316234421-82d701f24f9d/go.mod h1:F+5wygcW0wmRTnM3cOgIqGivxkwSWIWT5YdsDbeAOaU= k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAGcJo0Tvi+dK12EcqSLqcWsryKMpfM= k8s.io/kube-openapi v0.0.0-20211109043538-20434351676c/go.mod h1:vHXdDvt9+2spS2Rx9ql3I8tycm3H9FDfdUoIuKCefvw= -k8s.io/kube-openapi v0.0.0-20230918164632-68afd615200d h1:/CFeJBjBrZvHX09rObS2+2iEEDevMWYc1v3aIYAjIYI= -k8s.io/kube-openapi v0.0.0-20230918164632-68afd615200d/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/kube-openapi v0.0.0-20240105020646-a37d4de58910 h1:1Rp/XEKP5uxPs6QrsngEHAxBjaAR78iJRiJq5Fi7LSU= +k8s.io/kube-openapi v0.0.0-20240105020646-a37d4de58910/go.mod h1:Pa1PvrP7ACSkuX6I7KYomY6cmMA0Tx86waBhDUgoKPw= k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk= k8s.io/utils v0.0.0-20191114184206-e782cd3c129f/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew= k8s.io/utils v0.0.0-20200414100711-2df71ebbae66/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20231127182322-b307cd553661 h1:FepOBzJ0GXm8t0su67ln2wAZjbQ6RxQGZDnzuLcrUTI= -k8s.io/utils v0.0.0-20231127182322-b307cd553661/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= @@ -2096,6 +2090,6 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+s sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= diff --git a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-compact-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-compact-default-template.yaml index e0a3bdd17e1..bf9c9e30a3f 100755 --- a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-compact-default-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-compact-default-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-default objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-default + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-compact-default + name: rhobs-observatorium-thanos-compact-default namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-compact-default - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-default - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-compact-default - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml index ae49808353f..7d61cff7762 100755 --- a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-default objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: default + observatorium/tenant: default + name: observatorium-thanos-receive-ingestor-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: default + observatorium/tenant: default - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-receive-ingestor-default + name: rhobs-observatorium-thanos-receive-ingestor-default namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-default sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.5 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: default - observatorium/tenant: default - name: observatorium-thanos-receive-ingestor-default - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: default - observatorium/tenant: default parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml index 68d0c78911a..c7f26f835a3 100755 --- a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default objects: - apiVersion: route.openshift.io/v1 kind: Route @@ -18,7 +18,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: host: "" @@ -29,7 +29,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default weight: null - apiVersion: v1 kind: Service @@ -44,7 +44,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: ports: @@ -76,7 +76,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-default"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-default"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -85,7 +85,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -99,7 +99,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-ruler-default + name: rhobs-observatorium-thanos-rule-default namespace: openshift-customer-monitoring spec: endpoints: @@ -140,7 +140,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -151,7 +151,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: default - serviceName: observatorium-thanos-ruler-default + serviceName: observatorium-thanos-rule-default template: metadata: creationTimestamp: null @@ -195,10 +195,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-default sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -300,7 +300,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-default + - -openshift-service-account=observatorium-thanos-rule-default - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -374,7 +374,7 @@ objects: terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-default + serviceAccountName: observatorium-thanos-rule-default terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} diff --git a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-store-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-store-default-template.yaml index df89d3c80e1..5ffbc3e3b25 100755 --- a/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-store-default-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/default/observatorium-metrics-store-default-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-default objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + observatorium/tenant: default + name: hashmod-config-template-default + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,40 +163,8 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-default terminationGracePeriodSeconds: 120 -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: rhobs - spec: - clusterIP: None - ports: - - name: client - port: 11211 - protocol: TCP - targetPort: 11211 - - name: metrics - port: 9150 - protocol: TCP - targetPort: 9150 - selector: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount +- apiVersion: policy/v1 + kind: PodDisruptionBudget metadata: creationTimestamp: null labels: @@ -178,37 +172,11 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" observatorium/tenant: default name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: openshift-customer-monitoring spec: - endpoints: - - port: metrics - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs + maxUnavailable: 1 selector: matchLabels: app.kubernetes.io/component: store-bucket-cache @@ -216,111 +184,18 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: default -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default - namespace: rhobs - spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - strategy: {} - template: - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - namespace: rhobs - spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - - key: app.kubernetes.io/name - operator: In - values: - - memcached - topologyKey: kubernetes.io/hostname - weight: 100 - containers: - - args: - - --conn-limit=3072 - - --max-item-size=5m - - --memory-limit=2048 - - --verbose=true - image: quay.io/app-sre/memcached:1.5 - imagePullPolicy: IfNotPresent - name: memcached - ports: - - containerPort: 11211 - name: client - protocol: TCP - resources: - limits: - memory: 3Gi - requests: - cpu: 500m - memory: 2Gi - terminationMessagePolicy: FallbackToLogsOnError - - args: - - --memcached.address=localhost:0 - - --web.listen-address=:9150 - image: quay.io/prometheus/memcached-exporter:v0.13.0 - imagePullPolicy: IfNotPresent - name: memcached-exporter - ports: - - containerPort: 9150 - name: metrics - protocol: TCP - resources: - limits: - cpu: 200m - memory: 200Mi - requests: - cpu: 50m - memory: 50Mi - terminationMessagePolicy: FallbackToLogsOnError - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-store-index-cache-memcached-default - terminationGracePeriodSeconds: 120 - apiVersion: v1 kind: Service metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default + name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs spec: clusterIP: None @@ -334,7 +209,7 @@ objects: protocol: TCP targetPort: 9150 selector: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium @@ -346,27 +221,27 @@ objects: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default + name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-default + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-default namespace: openshift-customer-monitoring spec: endpoints: @@ -381,53 +256,6 @@ objects: namespaceSelector: matchNames: - rhobs - selector: - matchLabels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - observatorium/tenant: default - name: hashmod-config-template-default - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: rhobs - spec: - maxUnavailable: 1 selector: matchLabels: app.kubernetes.io/component: store-bucket-cache @@ -512,7 +340,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-store-default + name: rhobs-observatorium-thanos-store-default namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +439,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-default sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -795,14 +623,207 @@ objects: requests: storage: 5Gi storageClassName: gp2 +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - memcached + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - --conn-limit=3072 + - --max-item-size=5m + - --memory-limit=2048 + - --verbose=true + image: quay.io/app-sre/memcached:1.5 + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + protocol: TCP + resources: + limits: + memory: 3Gi + requests: + cpu: 500m + memory: 2Gi + terminationMessagePolicy: FallbackToLogsOnError + - args: + - --memcached.address=localhost:0 + - --web.listen-address=:9150 + image: quay.io/prometheus/memcached-exporter:v0.13.0 + imagePullPolicy: IfNotPresent + name: memcached-exporter + ports: + - containerPort: 9150 + name: metrics + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 50m + memory: 50Mi + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-thanos-store-index-cache-memcached-default + terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + clusterIP: None + ports: + - name: client + port: 11211 + protocol: TCP + targetPort: 11211 + - name: metrics + port: 9150 + protocol: TCP + targetPort: 9150 + selector: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + prometheus: app-sre + name: rhobs-observatorium-thanos-store-index-cache-memcached-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: metrics + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-api-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-api-template.yaml index 3486a2603d2..d52a8b67880 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-api-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-api-template.yaml @@ -115,7 +115,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: main prometheus: app-sre - name: avalanche + name: rhobs-avalanche namespace: openshift-customer-monitoring spec: endpoints: @@ -235,6 +235,25 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-api-cache-memcached terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: api-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + name: observatorium-api-cache-memcached + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: api-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -288,7 +307,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" prometheus: app-sre - name: observatorium-api-cache-memcached + name: rhobs-observatorium-api-cache-memcached namespace: openshift-customer-monitoring spec: endpoints: @@ -368,11 +387,6 @@ objects: - --middleware.rate-limiter.grpc-address=observatorium-gubernator.rhobs.svc.cluster.local:8081 - --rbac.config=/etc/observatorium/rbac/config.yaml - --tenants.config=/etc/observatorium/tenants/config.yaml - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/observatorium/api:main-2023-12-06-62d7703 imagePullPolicy: IfNotPresent livenessProbe: @@ -594,7 +608,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: main-2023-12-06-62d7703 prometheus: app-sre - name: observatorium-api + name: rhobs-observatorium-api namespace: openshift-customer-monitoring spec: endpoints: @@ -822,7 +836,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v2.0.0-rc.36 prometheus: app-sre - name: observatorium-gubernator + name: rhobs-observatorium-gubernator namespace: openshift-customer-monitoring spec: endpoints: @@ -1010,7 +1024,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: 969b895 prometheus: app-sre - name: observatorium-obsctl-reloader + name: rhobs-observatorium-obsctl-reloader namespace: openshift-customer-monitoring spec: endpoints: @@ -1518,6 +1532,217 @@ objects: app.kubernetes.io/version: main-2023-12-06-62d7703 name: observatorium-rbac namespace: rhobs +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - rules-objstore + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - -log.format=logfmt + - -log.level=warn + - -objstore.config-file=/etc/rules-objstore/objstore/config.yaml + image: quay.io/observatorium/rules-objstore:main-2022-09-21-9df4d2c + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 10 + httpGet: + path: /live + port: 8081 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 8081 + name: internal + protocol: TCP + - containerPort: 8080 + name: public + protocol: TCP + readinessProbe: + failureThreshold: 12 + httpGet: + path: /ready + port: 8081 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + memory: 400Mi + requests: + cpu: 50m + memory: 200Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/rules-objstore/objstore + name: objstore-config + readOnly: true + initContainers: + - command: + - /bin/sh + - -c + - echo "${OBJSTORE_CONFIG}" > /tmp/config/config.yaml + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhobs-rules-objstore-stage-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhobs-rules-objstore-stage-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: rhobs-rules-objstore-stage-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: rhobs-rules-objstore-stage-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: rhobs-rules-objstore-stage-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/app-sre/ubi8-ubi-minimal:8.9 + imagePullPolicy: IfNotPresent + name: init + resources: {} + volumeMounts: + - mountPath: /tmp/config + name: objstore-config + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-rules-objstore + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: objstore-config +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + namespace: rhobs + spec: + ports: + - name: internal + port: 8081 + protocol: TCP + targetPort: 8081 + - name: public + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + namespace: rhobs + spec: + endpoints: + - port: internal + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: {} + selector: + matchLabels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Secret metadata: @@ -1785,7 +2010,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: master-2022-03-24-098c31a prometheus: app-sre - name: observatorium-up-query-frontend + name: rhobs-observatorium-up-query-frontend namespace: openshift-customer-monitoring spec: endpoints: @@ -1913,7 +2138,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: master-2022-03-24-098c31a prometheus: app-sre - name: observatorium-up-query-rule + name: rhobs-observatorium-up-query-rule namespace: openshift-customer-monitoring spec: endpoints: @@ -1927,236 +2152,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: observatorium-up app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - data: - config.yaml: | - type: S3 - config: - bucket: $(OBJ_STORE_BUCKET) - endpoint: $(OBJ_STORE_ENDPOINT) - region: $(OBJ_STORE_REGION) - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - namespace: rhobs -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - namespace: rhobs - spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - strategy: {} - template: - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - namespace: rhobs - spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - - key: app.kubernetes.io/name - operator: In - values: - - rules-objstore - topologyKey: kubernetes.io/hostname - weight: 100 - containers: - - args: - - -log.format=logfmt - - -log.level=warn - - -objstore.config-file=/etc/rules-objstore/objstore/config.yaml - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - key: aws_access_key_id - name: rhobs-rules-objstore-stage-s3 - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - key: aws_secret_access_key - name: rhobs-rules-objstore-stage-s3 - - name: OBJ_STORE_BUCKET - valueFrom: - secretKeyRef: - key: bucket - name: rhobs-rules-objstore-stage-s3 - - name: OBJ_STORE_REGION - valueFrom: - secretKeyRef: - key: aws_region - name: rhobs-rules-objstore-stage-s3 - - name: OBJ_STORE_ENDPOINT - valueFrom: - secretKeyRef: - key: endpoint - name: rhobs-rules-objstore-stage-s3 - image: quay.io/observatorium/rules-objstore:main-2022-09-21-9df4d2c - imagePullPolicy: IfNotPresent - livenessProbe: - failureThreshold: 10 - httpGet: - path: /live - port: 8081 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 1 - name: thanos - ports: - - containerPort: 8081 - name: internal - protocol: TCP - - containerPort: 8080 - name: public - protocol: TCP - readinessProbe: - failureThreshold: 12 - httpGet: - path: /ready - port: 8081 - periodSeconds: 5 - successThreshold: 1 - timeoutSeconds: 1 - resources: - limits: - cpu: "1" - memory: 400Mi - requests: - cpu: 50m - memory: 200Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /etc/rules-objstore/objstore - name: objstore - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: observatorium-rules-objstore - terminationGracePeriodSeconds: 120 - volumes: - - configMap: - name: observatorium-rules-objstore - name: objstore -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - namespace: rhobs - spec: - ports: - - name: internal - port: 8081 - protocol: TCP - targetPort: 8081 - - name: public - port: 8080 - protocol: TCP - targetPort: 8080 - selector: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: ServiceAccount - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - namespace: rhobs - spec: - endpoints: - - port: internal - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: {} - selector: - matchLabels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: api-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - name: observatorium-api-cache-memcached - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: api-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium parameters: - name: AMS_OIDC_CLIENT_ID - name: AMS_OIDC_CLIENT_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-alertmanager-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-alertmanager-template.yaml index 203206d6f3c..adf618019a8 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-alertmanager-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-alertmanager-template.yaml @@ -4,6 +4,30 @@ metadata: creationTimestamp: null name: observatorium-alertmanager objects: +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: alertmanager + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.26.0 + name: observatorium-alertmanager-cluster + namespace: rhobs + spec: + clusterIP: None + ports: + - name: cluster-tcp + port: 9094 + protocol: TCP + targetPort: 9094 + selector: + app.kubernetes.io/component: alertmanager + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: observatorium - apiVersion: route.openshift.io/v1 kind: Route metadata: @@ -30,30 +54,6 @@ objects: kind: Service name: observatorium-alertmanager weight: null -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: alertmanager - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: alertmanager - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.26.0 - name: observatorium-alertmanager-cluster - namespace: rhobs - spec: - clusterIP: None - ports: - - name: cluster-tcp - port: 9094 - protocol: TCP - targetPort: 9094 - selector: - app.kubernetes.io/component: alertmanager - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: alertmanager - app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -106,7 +106,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.26.0 prometheus: app-sre - name: observatorium-alertmanager + name: rhobs-observatorium-alertmanager namespace: openshift-customer-monitoring spec: endpoints: @@ -181,7 +181,7 @@ objects: - --storage.path=/data - --cluster.peer=observatorium-alertmanager-0.observatorium-alertmanager-cluster.rhobs.svc.cluster.local:9094 - --cluster.peer=observatorium-alertmanager-1.observatorium-alertmanager-cluster.rhobs.svc.cluster.local:9094 - - --cluster.reconnect-timeout=5m + - --cluster.reconnect-timeout=5m0s - --log.level=${ALERTMGR_LOG_LEVEL} - --log.format=logfmt image: quay.io/prometheus/alertmanager:v0.26.0 diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml index 35f73ec4135..cf154e06eec 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml @@ -4,12 +4,9 @@ metadata: creationTimestamp: null name: observatorium-thanos-query-frontend objects: -- apiVersion: route.openshift.io/v1 - kind: Route +- apiVersion: apps/v1 + kind: Deployment metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http creationTimestamp: null labels: app.kubernetes.io/component: query-cache @@ -20,46 +17,23 @@ objects: name: observatorium-thanos-query-frontend namespace: rhobs spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query-frontend - weight: null -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached - namespace: rhobs - spec: - replicas: 1 + replicas: ${{QFE_REPLICAS}} selector: matchLabels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium strategy: {} template: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" + app.kubernetes.io/version: v0.32.5 namespace: rhobs spec: affinity: @@ -75,125 +49,185 @@ objects: - key: app.kubernetes.io/name operator: In values: - - memcached + - thanos-query-frontend topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - - --conn-limit=3072 - - --max-item-size=5m - - --memory-limit=2048 - - --verbose=true - image: quay.io/app-sre/memcached:1.5 + - query-frontend + - --cache-compression-type=snappy + - --labels.default-time-range=336h0m0s + - --labels.max-retries-per-request=0 + - --labels.split-interval=24h0m0s + - --log.format=logfmt + - --log.level=${QFE_LOG_LEVEL} + - --query-frontend.compress-responses + - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 + - --query-frontend.log-queries-longer-than=5s + - --query-range.max-retries-per-request=0 + - | + --query-range.response-cache-config=type: MEMCACHED + config: + addresses: + - dnssrv+_client._tcp.observatorium-thanos-query-range-cache-memcached.rhobs.svc + timeout: 2s + max_idle_connections: 1300 + max_async_concurrency: 200 + max_async_buffer_size: 2000000 + max_get_multi_concurrency: 1000 + max_item_size: 64MiB + max_get_multi_batch_size: 100 + - --query-range.split-interval=24h0m0s + - | + --tracing.config=type: JAEGER + config: + service_name: thanos-query-frontend + sampler_type: ratelimiting + sampler_param: 2 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + image: quay.io/thanos/thanos:v0.32.5 imagePullPolicy: IfNotPresent - name: memcached + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos ports: - - containerPort: 11211 - name: client + - containerPort: 10902 + name: http protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 resources: limits: - memory: 3Gi + memory: ${QFE_MEMORY_LIMIT} requests: - cpu: 500m - memory: 2Gi + cpu: ${QFE_CPU_REQUEST} + memory: ${QFE_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError - args: - - --memcached.address=localhost:0 - - --web.listen-address=:9150 - image: quay.io/prometheus/memcached-exporter:v0.13.0 - imagePullPolicy: IfNotPresent - name: memcached-exporter + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-query-frontend + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:4.15 + name: oauth-proxy ports: - - containerPort: 9150 - name: metrics + - containerPort: 8443 + name: https protocol: TCP resources: limits: cpu: 200m memory: 200Mi requests: - cpu: 50m - memory: 50Mi + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: tls + readOnly: true + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-query-range-cache-memcached + serviceAccountName: observatorium-thanos-query-frontend terminationGracePeriodSeconds: 120 -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached - namespace: rhobs - spec: - clusterIP: None - ports: - - name: client - port: 11211 - protocol: TCP - targetPort: 11211 - - name: metrics - port: 9150 - protocol: TCP - targetPort: 9150 - selector: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount + volumes: + - name: tls + secret: + secretName: query-frontend-tls +- apiVersion: route.openshift.io/v1 + kind: Route metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http creationTimestamp: null labels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-frontend namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - prometheus: app-sre - name: observatorium-thanos-query-range-cache-memcached - namespace: openshift-customer-monitoring spec: - endpoints: - - port: metrics - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-frontend + weight: null - apiVersion: v1 kind: Service metadata: @@ -250,7 +284,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 prometheus: app-sre - name: observatorium-thanos-query-frontend + name: rhobs-observatorium-thanos-query-frontend namespace: openshift-customer-monitoring spec: endpoints: @@ -276,31 +310,31 @@ objects: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query-frontend + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached namespace: rhobs spec: - replicas: ${{QFE_REPLICAS}} + replicas: 1 selector: matchLabels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium strategy: {} template: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 + app.kubernetes.io/version: "1.5" namespace: rhobs spec: affinity: @@ -316,159 +350,51 @@ objects: - key: app.kubernetes.io/name operator: In values: - - thanos-query-frontend + - memcached topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - - query-frontend - - --cache-compression-type=snappy - - --labels.default-time-range=336h0m0s - - --labels.max-retries-per-request=0 - - --labels.split-interval=24h0m0s - - --log.format=logfmt - - --log.level=${QFE_LOG_LEVEL} - - --query-frontend.compress-responses - - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 - - --query-frontend.log-queries-longer-than=5s - - --query-range.max-retries-per-request=0 - - | - --query-range.response-cache-config=type: MEMCACHED - config: - addresses: - - dnssrv+_client._tcp.observatorium-thanos-query-range-cache-memcached.rhobs.svc - timeout: 2s - max_idle_connections: 1300 - max_async_concurrency: 200 - max_async_buffer_size: 2000000 - max_get_multi_concurrency: 1000 - max_item_size: 64MiB - max_get_multi_batch_size: 100 - - --query-range.split-interval=24h0m0s - - | - --tracing.config=type: JAEGER - config: - service_name: thanos-query-frontend - sampler_type: ratelimiting - sampler_param: 2 - env: - - name: HOST_IP_ADDRESS - valueFrom: - fieldRef: - fieldPath: status.hostIP - image: quay.io/thanos/thanos:v0.32.5 + - --conn-limit=3072 + - --max-item-size=5m + - --memory-limit=2048 + - --verbose=true + image: quay.io/app-sre/memcached:1.5 imagePullPolicy: IfNotPresent - livenessProbe: - failureThreshold: 8 - httpGet: - path: /-/healthy - port: 10902 - periodSeconds: 30 - timeoutSeconds: 1 - name: thanos + name: memcached ports: - - containerPort: 10902 - name: http + - containerPort: 11211 + name: client protocol: TCP - readinessProbe: - failureThreshold: 20 - httpGet: - path: /-/ready - port: 10902 - periodSeconds: 5 resources: limits: - memory: ${QFE_MEMORY_LIMIT} + memory: 3Gi requests: - cpu: ${QFE_CPU_REQUEST} - memory: ${QFE_MEMORY_REQUEST} + cpu: 500m + memory: 2Gi terminationMessagePolicy: FallbackToLogsOnError - args: - - -provider=openshift - - -https-address=:8443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-query-frontend - - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", - "namespace": "rhobs"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", - "name": "rhobs", "namespace": "rhobs"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:4.15 - name: oauth-proxy + - --memcached.address=localhost:0 + - --web.listen-address=:9150 + image: quay.io/prometheus/memcached-exporter:v0.13.0 + imagePullPolicy: IfNotPresent + name: memcached-exporter ports: - - containerPort: 8443 - name: https + - containerPort: 9150 + name: metrics protocol: TCP resources: limits: cpu: 200m memory: 200Mi requests: - cpu: 100m - memory: 100Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /etc/tls/private - name: tls - readOnly: true - - args: - - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - - --reporter.type=grpc - - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) - env: - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD - valueFrom: - fieldRef: - fieldPath: metadata.name - image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 - livenessProbe: - failureThreshold: 5 - httpGet: - path: / - port: 14271 - name: jaeger-agent - ports: - - containerPort: 5778 - name: configs - protocol: TCP - - containerPort: 6831 - name: jaeger-thrift - protocol: TCP - - containerPort: 14271 - name: metrics - protocol: TCP - readinessProbe: - httpGet: - path: / - port: 14271 - initialDelaySeconds: 1 - resources: - limits: - cpu: 128m - memory: 128Mi - requests: - cpu: 32m - memory: 64Mi + cpu: 50m + memory: 50Mi terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-query-frontend + serviceAccountName: observatorium-thanos-query-range-cache-memcached terminationGracePeriodSeconds: 120 - volumes: - - name: tls - secret: - secretName: query-frontend-tls - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -488,6 +414,80 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached + namespace: rhobs + spec: + clusterIP: None + ports: + - name: client + port: 11211 + protocol: TCP + targetPort: 11211 + - name: metrics + port: 9150 + protocol: TCP + targetPort: 9150 + selector: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + prometheus: app-sre + name: rhobs-observatorium-thanos-query-range-cache-memcached + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: metrics + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-rule-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-rule-template.yaml index 9d9e61ac4a6..bca1c507eed 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-rule-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-rule-template.yaml @@ -4,113 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-query-rule objects: -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query-rule - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query-rule - weight: null -- apiVersion: v1 - kind: Service - metadata: - annotations: - service.alpha.openshift.io/serving-cert-secret-name: query-rule-tls - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query-rule - namespace: rhobs - spec: - ports: - - name: http - port: 10902 - protocol: TCP - targetPort: 10902 - - name: grpc - port: 10901 - protocol: TCP - targetPort: 10901 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 - selector: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount - metadata: - annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-rule"}}' - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query-rule - namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - prometheus: app-sre - name: observatorium-thanos-query-rule - namespace: openshift-customer-monitoring - spec: - endpoints: - - port: http - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - apiVersion: apps/v1 kind: Deployment metadata: @@ -168,19 +61,19 @@ objects: - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-default.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-rhel.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-telemeter.rhobs.svc.cluster.local - - --endpoint=http://observatorium-thanos-ruler-default.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-rhel.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-telemeter.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-default.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-rhel.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-telemeter.rhobs.svc.cluster.local:10902 - --log.format=logfmt - --log.level=${QUERY_LOG_LEVEL} - --query.auto-downsampling - - --query.lookback-delta=15m + - --query.lookback-delta=15m0s - --query.max-concurrent=10 - --query.promql-engine=prometheus - --query.replica-label=replica - --query.replica-label=prometheus_replica - --query.replica-label=rule_replica - - --query.timeout=15m + - --query.timeout=15m0s - | --tracing.config=type: JAEGER config: @@ -309,6 +202,113 @@ objects: - name: tls secret: secretName: query-rule-tls +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-rule + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-rule + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-rule-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-rule + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-rule"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-rule + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + prometheus: app-sre + name: rhobs-rhobs-observatorium-thanos-query-rule + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml index 2ef1fb28565..0725a48eb1b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml @@ -4,113 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-query objects: -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query - weight: null -- apiVersion: v1 - kind: Service - metadata: - annotations: - service.alpha.openshift.io/serving-cert-secret-name: query-adhoc-tls - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query - namespace: rhobs - spec: - ports: - - name: http - port: 10902 - protocol: TCP - targetPort: 10902 - - name: grpc - port: 10901 - protocol: TCP - targetPort: 10901 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 - selector: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount - metadata: - annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query"}}' - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - name: observatorium-thanos-query - namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - prometheus: app-sre - name: observatorium-thanos-query - namespace: openshift-customer-monitoring - spec: - endpoints: - - port: http - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - apiVersion: apps/v1 kind: Deployment metadata: @@ -168,13 +61,13 @@ objects: - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-default.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-rhel.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-telemeter.rhobs.svc.cluster.local - - --endpoint=http://observatorium-thanos-ruler-default.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-rhel.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-telemeter.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-default.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-rhel.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-telemeter.rhobs.svc.cluster.local:10902 - --log.format=logfmt - --log.level=${QUERY_LOG_LEVEL} - --query.auto-downsampling - - --query.lookback-delta=15m + - --query.lookback-delta=15m0s - --query.max-concurrent=10 - --query.promql-engine=prometheus - --query.replica-label=replica @@ -193,7 +86,7 @@ objects: - --query.telemetry.request-duration-seconds-quantiles=30 - --query.telemetry.request-duration-seconds-quantiles=60 - --query.telemetry.request-duration-seconds-quantiles=120 - - --query.timeout=15m + - --query.timeout=15m0s - | --tracing.config=type: JAEGER config: @@ -322,6 +215,113 @@ objects: - name: tls secret: secretName: query-adhoc-tls +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-adhoc-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + prometheus: app-sre + name: rhobs-rhobs-observatorium-thanos-query + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml index 530906c289a..47accab7629 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml @@ -4,20 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-router objects: -- apiVersion: v1 - data: - hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: kubernetes-controller - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-controller - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2023-09-22-f168dd7 - name: thanos-receive-hashring - namespace: rhobs - apiVersion: apps/v1 kind: Deployment metadata: @@ -286,9 +272,11 @@ objects: terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /etc/thanos/hashring - name: hashring-config + name: hashring + readOnly: true - mountPath: /etc/thanos/receive-limits - name: receive-limits-config + name: receive-limits + readOnly: true - args: - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - --reporter.type=grpc @@ -339,10 +327,29 @@ objects: volumes: - configMap: name: thanos-receive-hashring-generated - name: hashring-config + name: hashring - configMap: name: observatorium-thanos-receive-router-limits - name: receive-limits-config + name: receive-limits +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-receive-router + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -399,7 +406,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 prometheus: app-sre - name: observatorium-thanos-receive-router + name: rhobs-observatorium-thanos-receive-router namespace: openshift-customer-monitoring spec: endpoints: @@ -420,25 +427,20 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-receive-router app.kubernetes.io/part-of: observatorium -- apiVersion: policy/v1 - kind: PodDisruptionBudget +- apiVersion: v1 + data: + hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' + kind: ConfigMap metadata: creationTimestamp: null labels: - app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/component: kubernetes-controller app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/name: thanos-receive-controller app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-receive-router + app.kubernetes.io/version: main-2023-09-22-f168dd7 + name: thanos-receive-hashring namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-router - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-router - app.kubernetes.io/part-of: observatorium parameters: - name: ROUTER_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml index e7af7e4820c..d8b5cc88ef9 100755 --- a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-rhel objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-rhel + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-compact-rhel + name: rhobs-observatorium-thanos-compact-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-compact-rhel - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-rhel - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-compact-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml index ba59851c9f7..d3516638348 100755 --- a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-rhel objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: rhel + observatorium/tenant: rhel + name: observatorium-thanos-receive-ingestor-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: rhel + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-receive-ingestor-rhel + name: rhobs-observatorium-thanos-receive-ingestor-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-rhel sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.5 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: rhel - observatorium/tenant: rhel - name: observatorium-thanos-receive-ingestor-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: rhel - observatorium/tenant: rhel parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml index eea91dd1e8b..10226579ddd 100755 --- a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel objects: - apiVersion: route.openshift.io/v1 kind: Route @@ -18,7 +18,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: host: "" @@ -29,7 +29,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel weight: null - apiVersion: v1 kind: Service @@ -44,7 +44,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: ports: @@ -76,7 +76,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-rhel"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-rhel"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -85,7 +85,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -99,7 +99,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-ruler-rhel + name: rhobs-observatorium-thanos-rule-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -140,7 +140,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -151,7 +151,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel - serviceName: observatorium-thanos-ruler-rhel + serviceName: observatorium-thanos-rule-rhel template: metadata: creationTimestamp: null @@ -195,10 +195,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-rhel sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -300,7 +300,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-rhel + - -openshift-service-account=observatorium-thanos-rule-rhel - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -374,7 +374,7 @@ objects: terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-rhel + serviceAccountName: observatorium-thanos-rule-rhel terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} diff --git a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml index b6074f3b58e..8f017ba3229 100755 --- a/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-rhel objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + observatorium/tenant: rhel + name: hashmod-config-template-rhel + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,6 +163,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-rhel terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-bucket-cache-memcached-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -194,7 +241,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-rhel + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -309,6 +356,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-index-cache-memcached-rhel terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-index-cache-memcached-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -366,7 +434,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-rhel + name: rhobs-observatorium-thanos-store-index-cache-memcached-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -388,53 +456,6 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - observatorium/tenant: rhel - name: hashmod-config-template-rhel - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-store-bucket-cache-memcached-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -512,7 +533,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-rhel + name: rhobs-observatorium-thanos-store-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +632,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-rhel sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -797,12 +818,12 @@ objects: storageClassName: gp2 parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml index 6206b960728..764d301e82e 100755 --- a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-telemeter objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-telemeter + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-compact-telemeter + name: rhobs-observatorium-thanos-compact-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-compact-telemeter - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-telemeter - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-compact-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml index 15f6b272a8d..ccc1d508535 100755 --- a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-telemeter objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: telemeter + observatorium/tenant: telemeter + name: observatorium-thanos-receive-ingestor-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: telemeter + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-receive-ingestor-telemeter + name: rhobs-observatorium-thanos-receive-ingestor-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-telemeter sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.5 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: telemeter - observatorium/tenant: telemeter - name: observatorium-thanos-receive-ingestor-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: telemeter - observatorium/tenant: telemeter parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml index 991e6ba9f87..15b128d6efb 100755 --- a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml @@ -2,35 +2,8 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter objects: -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: rule-evaluation-engine - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-ruler-telemeter - weight: null - apiVersion: v1 data: observatorium.yaml: | @@ -163,6 +136,33 @@ objects: observatorium/tenant: telemeter name: observatorium-rules namespace: rhobs +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: rule-evaluation-engine + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + observatorium/tenant: telemeter + name: observatorium-thanos-rule-telemeter + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-rule-telemeter + weight: null - apiVersion: v1 kind: Service metadata: @@ -176,7 +176,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs spec: ports: @@ -208,7 +208,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-telemeter"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-telemeter"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -217,7 +217,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -231,7 +231,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-ruler-telemeter + name: rhobs-observatorium-thanos-rule-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -272,7 +272,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -283,7 +283,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter - serviceName: observatorium-thanos-ruler-telemeter + serviceName: observatorium-thanos-rule-telemeter template: metadata: creationTimestamp: null @@ -328,10 +328,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-telemeter sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -435,7 +435,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-telemeter + - -openshift-service-account=observatorium-thanos-rule-telemeter - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -525,7 +525,7 @@ objects: name: observatorium-rules nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-telemeter + serviceAccountName: observatorium-thanos-rule-telemeter terminationGracePeriodSeconds: 120 volumes: - configMap: diff --git a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml index 4db83038d43..32a27d5b9e7 100755 --- a/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-telemeter objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + observatorium/tenant: telemeter + name: hashmod-config-template-telemeter + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,6 +163,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-telemeter terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-store-bucket-cache-memcached-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -194,7 +241,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-telemeter + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -309,6 +356,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-index-cache-memcached-telemeter terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-store-index-cache-memcached-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -366,7 +434,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-telemeter + name: rhobs-observatorium-thanos-store-index-cache-memcached-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -388,53 +456,6 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.5 - observatorium/tenant: telemeter - name: hashmod-config-template-telemeter - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-store-bucket-cache-memcached-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -512,7 +533,7 @@ objects: app.kubernetes.io/version: v0.32.5 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-telemeter + name: rhobs-observatorium-thanos-store-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +632,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-telemeter sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -797,12 +818,12 @@ objects: storageClassName: gp2 parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-compact-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-compact-default-template.yaml index 3b4ad1d62d2..ccc43a74ec7 100755 --- a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-compact-default-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-compact-default-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-default objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-default + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-compact-default + name: rhobs-observatorium-thanos-compact-default namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-compact-default - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-default - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-compact-default - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml index e4f43a58d82..1480d888dfc 100755 --- a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-receive-ingestor-default-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-default objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: default + observatorium/tenant: default + name: observatorium-thanos-receive-ingestor-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: default + observatorium/tenant: default - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-receive-ingestor-default + name: rhobs-observatorium-thanos-receive-ingestor-default namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-default sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: default - observatorium/tenant: default - name: observatorium-thanos-receive-ingestor-default - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: default - observatorium/tenant: default parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml index 82f43ae7a9f..a1085bb2138 100755 --- a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-ruler-default-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default objects: - apiVersion: route.openshift.io/v1 kind: Route @@ -18,7 +18,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: host: "" @@ -29,7 +29,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default weight: null - apiVersion: v1 kind: Service @@ -44,7 +44,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: ports: @@ -76,7 +76,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-default"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-default"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -85,7 +85,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -99,7 +99,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-ruler-default + name: rhobs-observatorium-thanos-rule-default namespace: openshift-customer-monitoring spec: endpoints: @@ -140,7 +140,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: default - name: observatorium-thanos-ruler-default + name: observatorium-thanos-rule-default namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -151,7 +151,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: default - serviceName: observatorium-thanos-ruler-default + serviceName: observatorium-thanos-rule-default template: metadata: creationTimestamp: null @@ -195,10 +195,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-default sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -300,7 +300,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-default + - -openshift-service-account=observatorium-thanos-rule-default - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -374,7 +374,7 @@ objects: terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-default + serviceAccountName: observatorium-thanos-rule-default terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} diff --git a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-store-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-store-default-template.yaml index 54de55d422d..d9d73278ec3 100755 --- a/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-store-default-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/default/observatorium-metrics-store-default-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-default objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: hashmod-config-template-default + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,40 +163,8 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-default terminationGracePeriodSeconds: 120 -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: rhobs - spec: - clusterIP: None - ports: - - name: client - port: 11211 - protocol: TCP - targetPort: 11211 - - name: metrics - port: 9150 - protocol: TCP - targetPort: 9150 - selector: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount +- apiVersion: policy/v1 + kind: PodDisruptionBudget metadata: creationTimestamp: null labels: @@ -178,37 +172,11 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" observatorium/tenant: default name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: openshift-customer-monitoring spec: - endpoints: - - port: metrics - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs + maxUnavailable: 1 selector: matchLabels: app.kubernetes.io/component: store-bucket-cache @@ -216,111 +184,18 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: default -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default - namespace: rhobs - spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - strategy: {} - template: - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - observatorium/tenant: default - namespace: rhobs - spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - - key: app.kubernetes.io/name - operator: In - values: - - memcached - topologyKey: kubernetes.io/hostname - weight: 100 - containers: - - args: - - --conn-limit=3072 - - --max-item-size=5m - - --memory-limit=2048 - - --verbose=true - image: quay.io/app-sre/memcached:1.5 - imagePullPolicy: IfNotPresent - name: memcached - ports: - - containerPort: 11211 - name: client - protocol: TCP - resources: - limits: - memory: 3Gi - requests: - cpu: 500m - memory: 2Gi - terminationMessagePolicy: FallbackToLogsOnError - - args: - - --memcached.address=localhost:0 - - --web.listen-address=:9150 - image: quay.io/prometheus/memcached-exporter:v0.13.0 - imagePullPolicy: IfNotPresent - name: memcached-exporter - ports: - - containerPort: 9150 - name: metrics - protocol: TCP - resources: - limits: - cpu: 200m - memory: 200Mi - requests: - cpu: 50m - memory: 50Mi - terminationMessagePolicy: FallbackToLogsOnError - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-store-index-cache-memcached-default - terminationGracePeriodSeconds: 120 - apiVersion: v1 kind: Service metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default + name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs spec: clusterIP: None @@ -334,7 +209,7 @@ objects: protocol: TCP targetPort: 9150 selector: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium @@ -346,27 +221,27 @@ objects: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default - name: observatorium-thanos-store-index-cache-memcached-default + name: observatorium-thanos-store-bucket-cache-memcached-default namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: creationTimestamp: null labels: - app.kubernetes.io/component: store-index-cache + app.kubernetes.io/component: store-bucket-cache app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-default + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-default namespace: openshift-customer-monitoring spec: endpoints: @@ -381,53 +256,6 @@ objects: namespaceSelector: matchNames: - rhobs - selector: - matchLabels: - app.kubernetes.io/component: store-index-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - observatorium/tenant: default - name: hashmod-config-template-default - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: default - name: observatorium-thanos-store-bucket-cache-memcached-default - namespace: rhobs - spec: - maxUnavailable: 1 selector: matchLabels: app.kubernetes.io/component: store-bucket-cache @@ -512,7 +340,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: default prometheus: app-sre - name: observatorium-thanos-store-default + name: rhobs-observatorium-thanos-store-default namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +439,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-default sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -795,14 +623,207 @@ objects: requests: storage: 5Gi storageClassName: gp2 +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - memcached + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - --conn-limit=3072 + - --max-item-size=5m + - --memory-limit=2048 + - --verbose=true + image: quay.io/app-sre/memcached:1.5 + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + protocol: TCP + resources: + limits: + memory: 3Gi + requests: + cpu: 500m + memory: 2Gi + terminationMessagePolicy: FallbackToLogsOnError + - args: + - --memcached.address=localhost:0 + - --web.listen-address=:9150 + image: quay.io/prometheus/memcached-exporter:v0.13.0 + imagePullPolicy: IfNotPresent + name: memcached-exporter + ports: + - containerPort: 9150 + name: metrics + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 50m + memory: 50Mi + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-thanos-store-index-cache-memcached-default + terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs + spec: + clusterIP: None + ports: + - name: client + port: 11211 + protocol: TCP + targetPort: 11211 + - name: metrics + port: 9150 + protocol: TCP + targetPort: 9150 + selector: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + name: observatorium-thanos-store-index-cache-memcached-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + observatorium/tenant: default + prometheus: app-sre + name: rhobs-observatorium-thanos-store-index-cache-memcached-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: metrics + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-api-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-api-template.yaml index 12b32ad8ec1..5f66b2653f5 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-api-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-api-template.yaml @@ -110,7 +110,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: main prometheus: app-sre - name: avalanche + name: -avalanche namespace: openshift-customer-monitoring spec: endpoints: @@ -227,6 +227,24 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-api-cache-memcached terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: api-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + name: observatorium-api-cache-memcached + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: api-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -278,7 +296,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: "1.5" prometheus: app-sre - name: observatorium-api-cache-memcached + name: -observatorium-api-cache-memcached namespace: openshift-customer-monitoring spec: endpoints: @@ -355,11 +373,6 @@ objects: - --metrics.write.endpoint=http://observatorium-thanos-receive-router..svc.cluster.local:19291 - --middleware.rate-limiter.grpc-address=observatorium-gubernator..svc.cluster.local:8081 - --tenants.config=/etc/observatorium/tenants/config.yaml - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/observatorium/api:main-2023-12-06-62d7703 imagePullPolicy: IfNotPresent livenessProbe: @@ -572,7 +585,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: main-2023-12-06-62d7703 prometheus: app-sre - name: observatorium-api + name: -observatorium-api namespace: openshift-customer-monitoring spec: endpoints: @@ -793,7 +806,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v2.0.0-rc.36 prometheus: app-sre - name: observatorium-gubernator + name: -observatorium-gubernator namespace: openshift-customer-monitoring spec: endpoints: @@ -974,7 +987,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: 969b895 prometheus: app-sre - name: observatorium-obsctl-reloader + name: -observatorium-obsctl-reloader namespace: openshift-customer-monitoring spec: endpoints: @@ -988,6 +1001,207 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: rules-obsctl-reloader app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - rules-objstore + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - -log.format=logfmt + - -log.level=warn + - -objstore.config-file=/etc/rules-objstore/objstore/config.yaml + image: quay.io/observatorium/rules-objstore:main-2022-09-21-9df4d2c + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 10 + httpGet: + path: /live + port: 8081 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 8081 + name: internal + protocol: TCP + - containerPort: 8080 + name: public + protocol: TCP + readinessProbe: + failureThreshold: 12 + httpGet: + path: /ready + port: 8081 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + memory: 400Mi + requests: + cpu: 50m + memory: 200Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/rules-objstore/objstore + name: objstore-config + readOnly: true + initContainers: + - command: + - /bin/sh + - -c + - echo "${OBJSTORE_CONFIG}" > /tmp/config/config.yaml + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/app-sre/ubi8-ubi-minimal:8.9 + imagePullPolicy: IfNotPresent + name: init + resources: {} + volumeMounts: + - mountPath: /tmp/config + name: objstore-config + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-rules-objstore + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: objstore-config +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + spec: + ports: + - name: internal + port: 8081 + protocol: TCP + targetPort: 8081 + - name: public + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: main-2022-09-21-9df4d2c + name: observatorium-rules-objstore + spec: + endpoints: + - port: internal + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: {} + selector: + matchLabels: + app.kubernetes.io/component: rules-storage + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: rules-objstore + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Secret metadata: @@ -1134,7 +1348,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: master-2022-03-24-098c31a prometheus: app-sre - name: observatorium-up-query-frontend + name: -observatorium-up-query-frontend namespace: openshift-customer-monitoring spec: endpoints: @@ -1258,7 +1472,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: master-2022-03-24-098c31a prometheus: app-sre - name: observatorium-up-query-rule + name: -observatorium-up-query-rule namespace: openshift-customer-monitoring spec: endpoints: @@ -1272,224 +1486,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: observatorium-up app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - data: - config.yaml: | - type: S3 - config: - bucket: $(OBJ_STORE_BUCKET) - endpoint: $(OBJ_STORE_ENDPOINT) - region: $(OBJ_STORE_REGION) - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - strategy: {} - template: - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - - key: app.kubernetes.io/name - operator: In - values: - - rules-objstore - topologyKey: kubernetes.io/hostname - weight: 100 - containers: - - args: - - -log.format=logfmt - - -log.level=warn - - -objstore.config-file=/etc/rules-objstore/objstore/config.yaml - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - key: aws_access_key_id - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - key: aws_secret_access_key - - name: OBJ_STORE_BUCKET - valueFrom: - secretKeyRef: - key: bucket - - name: OBJ_STORE_REGION - valueFrom: - secretKeyRef: - key: aws_region - - name: OBJ_STORE_ENDPOINT - valueFrom: - secretKeyRef: - key: endpoint - image: quay.io/observatorium/rules-objstore:main-2022-09-21-9df4d2c - imagePullPolicy: IfNotPresent - livenessProbe: - failureThreshold: 10 - httpGet: - path: /live - port: 8081 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 1 - name: thanos - ports: - - containerPort: 8081 - name: internal - protocol: TCP - - containerPort: 8080 - name: public - protocol: TCP - readinessProbe: - failureThreshold: 12 - httpGet: - path: /ready - port: 8081 - periodSeconds: 5 - successThreshold: 1 - timeoutSeconds: 1 - resources: - limits: - cpu: "1" - memory: 400Mi - requests: - cpu: 50m - memory: 200Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /etc/rules-objstore/objstore - name: objstore - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: observatorium-rules-objstore - terminationGracePeriodSeconds: 120 - volumes: - - configMap: - name: observatorium-rules-objstore - name: objstore -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - spec: - ports: - - name: internal - port: 8081 - protocol: TCP - targetPort: 8081 - - name: public - port: 8080 - protocol: TCP - targetPort: 8080 - selector: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: ServiceAccount - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2022-09-21-9df4d2c - name: observatorium-rules-objstore - spec: - endpoints: - - port: internal - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: {} - selector: - matchLabels: - app.kubernetes.io/component: rules-storage - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: rules-objstore - app.kubernetes.io/part-of: observatorium -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: api-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - name: observatorium-api-cache-memcached - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: api-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium parameters: - name: AMS_OIDC_CLIENT_ID - name: AMS_OIDC_CLIENT_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-alertmanager-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-alertmanager-template.yaml index 203206d6f3c..adf618019a8 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-alertmanager-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-alertmanager-template.yaml @@ -4,6 +4,30 @@ metadata: creationTimestamp: null name: observatorium-alertmanager objects: +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: alertmanager + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.26.0 + name: observatorium-alertmanager-cluster + namespace: rhobs + spec: + clusterIP: None + ports: + - name: cluster-tcp + port: 9094 + protocol: TCP + targetPort: 9094 + selector: + app.kubernetes.io/component: alertmanager + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: observatorium - apiVersion: route.openshift.io/v1 kind: Route metadata: @@ -30,30 +54,6 @@ objects: kind: Service name: observatorium-alertmanager weight: null -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: alertmanager - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: alertmanager - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.26.0 - name: observatorium-alertmanager-cluster - namespace: rhobs - spec: - clusterIP: None - ports: - - name: cluster-tcp - port: 9094 - protocol: TCP - targetPort: 9094 - selector: - app.kubernetes.io/component: alertmanager - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: alertmanager - app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -106,7 +106,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.26.0 prometheus: app-sre - name: observatorium-alertmanager + name: rhobs-observatorium-alertmanager namespace: openshift-customer-monitoring spec: endpoints: @@ -181,7 +181,7 @@ objects: - --storage.path=/data - --cluster.peer=observatorium-alertmanager-0.observatorium-alertmanager-cluster.rhobs.svc.cluster.local:9094 - --cluster.peer=observatorium-alertmanager-1.observatorium-alertmanager-cluster.rhobs.svc.cluster.local:9094 - - --cluster.reconnect-timeout=5m + - --cluster.reconnect-timeout=5m0s - --log.level=${ALERTMGR_LOG_LEVEL} - --log.format=logfmt image: quay.io/prometheus/alertmanager:v0.26.0 diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml index d7297a6dc50..0d9abdaabaf 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml @@ -4,12 +4,9 @@ metadata: creationTimestamp: null name: observatorium-thanos-query-frontend objects: -- apiVersion: route.openshift.io/v1 - kind: Route +- apiVersion: apps/v1 + kind: Deployment metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http creationTimestamp: null labels: app.kubernetes.io/component: query-cache @@ -20,46 +17,23 @@ objects: name: observatorium-thanos-query-frontend namespace: rhobs spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query-frontend - weight: null -- apiVersion: apps/v1 - kind: Deployment - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached - namespace: rhobs - spec: - replicas: 1 + replicas: ${{QFE_REPLICAS}} selector: matchLabels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium strategy: {} template: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" + app.kubernetes.io/version: v0.32.4 namespace: rhobs spec: affinity: @@ -75,125 +49,185 @@ objects: - key: app.kubernetes.io/name operator: In values: - - memcached + - thanos-query-frontend topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - - --conn-limit=3072 - - --max-item-size=5m - - --memory-limit=2048 - - --verbose=true - image: quay.io/app-sre/memcached:1.5 + - query-frontend + - --cache-compression-type=snappy + - --labels.default-time-range=336h0m0s + - --labels.max-retries-per-request=0 + - --labels.split-interval=24h0m0s + - --log.format=logfmt + - --log.level=${QFE_LOG_LEVEL} + - --query-frontend.compress-responses + - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 + - --query-frontend.log-queries-longer-than=5s + - --query-range.max-retries-per-request=0 + - | + --query-range.response-cache-config=type: MEMCACHED + config: + addresses: + - dnssrv+_client._tcp.observatorium-thanos-query-range-cache-memcached.rhobs.svc + timeout: 2s + max_idle_connections: 1300 + max_async_concurrency: 200 + max_async_buffer_size: 2000000 + max_get_multi_concurrency: 1000 + max_item_size: 64MiB + max_get_multi_batch_size: 100 + - --query-range.split-interval=24h0m0s + - | + --tracing.config=type: JAEGER + config: + service_name: thanos-query-frontend + sampler_type: ratelimiting + sampler_param: 2 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent - name: memcached + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos ports: - - containerPort: 11211 - name: client + - containerPort: 10902 + name: http protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 resources: limits: - memory: 3Gi + memory: ${QFE_MEMORY_LIMIT} requests: - cpu: 500m - memory: 2Gi + cpu: ${QFE_CPU_REQUEST} + memory: ${QFE_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError - args: - - --memcached.address=localhost:0 - - --web.listen-address=:9150 - image: quay.io/prometheus/memcached-exporter:v0.13.0 - imagePullPolicy: IfNotPresent - name: memcached-exporter + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-query-frontend + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:4.15 + name: oauth-proxy ports: - - containerPort: 9150 - name: metrics + - containerPort: 8443 + name: https protocol: TCP resources: limits: cpu: 200m memory: 200Mi requests: - cpu: 50m - memory: 50Mi + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: tls + readOnly: true + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-query-range-cache-memcached + serviceAccountName: observatorium-thanos-query-frontend terminationGracePeriodSeconds: 120 -- apiVersion: v1 - kind: Service - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached - namespace: rhobs - spec: - clusterIP: None - ports: - - name: client - port: 11211 - protocol: TCP - targetPort: 11211 - - name: metrics - port: 9150 - protocol: TCP - targetPort: 9150 - selector: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount + volumes: + - name: tls + secret: + secretName: query-frontend-tls +- apiVersion: route.openshift.io/v1 + kind: Route metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http creationTimestamp: null labels: - app.kubernetes.io/component: query-range-cache + app.kubernetes.io/component: query-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached + app.kubernetes.io/name: thanos-query-frontend app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - name: observatorium-thanos-query-range-cache-memcached + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-frontend namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: "1.5" - prometheus: app-sre - name: observatorium-thanos-query-range-cache-memcached - namespace: openshift-customer-monitoring spec: - endpoints: - - port: metrics - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-range-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-frontend + weight: null - apiVersion: v1 kind: Service metadata: @@ -250,7 +284,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 prometheus: app-sre - name: observatorium-thanos-query-frontend + name: rhobs-observatorium-thanos-query-frontend namespace: openshift-customer-monitoring spec: endpoints: @@ -276,31 +310,31 @@ objects: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query-frontend + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached namespace: rhobs spec: - replicas: ${{QFE_REPLICAS}} + replicas: 1 selector: matchLabels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium strategy: {} template: metadata: creationTimestamp: null labels: - app.kubernetes.io/component: query-cache + app.kubernetes.io/component: query-range-cache app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 + app.kubernetes.io/version: "1.5" namespace: rhobs spec: affinity: @@ -316,159 +350,51 @@ objects: - key: app.kubernetes.io/name operator: In values: - - thanos-query-frontend + - memcached topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - - query-frontend - - --cache-compression-type=snappy - - --labels.default-time-range=336h0m0s - - --labels.max-retries-per-request=0 - - --labels.split-interval=24h0m0s - - --log.format=logfmt - - --log.level=${QFE_LOG_LEVEL} - - --query-frontend.compress-responses - - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 - - --query-frontend.log-queries-longer-than=5s - - --query-range.max-retries-per-request=0 - - | - --query-range.response-cache-config=type: MEMCACHED - config: - addresses: - - dnssrv+_client._tcp.observatorium-thanos-query-range-cache-memcached.rhobs.svc - timeout: 2s - max_idle_connections: 1300 - max_async_concurrency: 200 - max_async_buffer_size: 2000000 - max_get_multi_concurrency: 1000 - max_item_size: 64MiB - max_get_multi_batch_size: 100 - - --query-range.split-interval=24h0m0s - - | - --tracing.config=type: JAEGER - config: - service_name: thanos-query-frontend - sampler_type: ratelimiting - sampler_param: 2 - env: - - name: HOST_IP_ADDRESS - valueFrom: - fieldRef: - fieldPath: status.hostIP - image: quay.io/thanos/thanos:v0.32.4 + - --conn-limit=3072 + - --max-item-size=5m + - --memory-limit=2048 + - --verbose=true + image: quay.io/app-sre/memcached:1.5 imagePullPolicy: IfNotPresent - livenessProbe: - failureThreshold: 8 - httpGet: - path: /-/healthy - port: 10902 - periodSeconds: 30 - timeoutSeconds: 1 - name: thanos + name: memcached ports: - - containerPort: 10902 - name: http + - containerPort: 11211 + name: client protocol: TCP - readinessProbe: - failureThreshold: 20 - httpGet: - path: /-/ready - port: 10902 - periodSeconds: 5 resources: limits: - memory: ${QFE_MEMORY_LIMIT} + memory: 3Gi requests: - cpu: ${QFE_CPU_REQUEST} - memory: ${QFE_MEMORY_REQUEST} + cpu: 500m + memory: 2Gi terminationMessagePolicy: FallbackToLogsOnError - args: - - -provider=openshift - - -https-address=:8443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-query-frontend - - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", - "namespace": "rhobs"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", - "name": "rhobs", "namespace": "rhobs"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:4.15 - name: oauth-proxy + - --memcached.address=localhost:0 + - --web.listen-address=:9150 + image: quay.io/prometheus/memcached-exporter:v0.13.0 + imagePullPolicy: IfNotPresent + name: memcached-exporter ports: - - containerPort: 8443 - name: https + - containerPort: 9150 + name: metrics protocol: TCP resources: limits: cpu: 200m memory: 200Mi requests: - cpu: 100m - memory: 100Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /etc/tls/private - name: tls - readOnly: true - - args: - - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - - --reporter.type=grpc - - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) - env: - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD - valueFrom: - fieldRef: - fieldPath: metadata.name - image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 - livenessProbe: - failureThreshold: 5 - httpGet: - path: / - port: 14271 - name: jaeger-agent - ports: - - containerPort: 5778 - name: configs - protocol: TCP - - containerPort: 6831 - name: jaeger-thrift - protocol: TCP - - containerPort: 14271 - name: metrics - protocol: TCP - readinessProbe: - httpGet: - path: / - port: 14271 - initialDelaySeconds: 1 - resources: - limits: - cpu: 128m - memory: 128Mi - requests: - cpu: 32m - memory: 64Mi + cpu: 50m + memory: 50Mi terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-query-frontend + serviceAccountName: observatorium-thanos-query-range-cache-memcached terminationGracePeriodSeconds: 120 - volumes: - - name: tls - secret: - secretName: query-frontend-tls - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -488,6 +414,80 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached + namespace: rhobs + spec: + clusterIP: None + ports: + - name: client + port: 11211 + protocol: TCP + targetPort: 11211 + - name: metrics + port: 9150 + protocol: TCP + targetPort: 9150 + selector: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + name: observatorium-thanos-query-range-cache-memcached + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: "1.5" + prometheus: app-sre + name: rhobs-observatorium-thanos-query-range-cache-memcached + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: metrics + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-range-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-rule-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-rule-template.yaml index 58872421e68..8deade2f0dc 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-rule-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-rule-template.yaml @@ -4,113 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-query-rule objects: -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query-rule - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query-rule - weight: null -- apiVersion: v1 - kind: Service - metadata: - annotations: - service.alpha.openshift.io/serving-cert-secret-name: query-rule-tls - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query-rule - namespace: rhobs - spec: - ports: - - name: http - port: 10902 - protocol: TCP - targetPort: 10902 - - name: grpc - port: 10901 - protocol: TCP - targetPort: 10901 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 - selector: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount - metadata: - annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-rule"}}' - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query-rule - namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - prometheus: app-sre - name: observatorium-thanos-query-rule - namespace: openshift-customer-monitoring - spec: - endpoints: - - port: http - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query-rule - app.kubernetes.io/part-of: observatorium - apiVersion: apps/v1 kind: Deployment metadata: @@ -168,19 +61,19 @@ objects: - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-default.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-rhel.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-telemeter.rhobs.svc.cluster.local - - --endpoint=http://observatorium-thanos-ruler-default.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-rhel.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-telemeter.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-default.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-rhel.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-telemeter.rhobs.svc.cluster.local:10902 - --log.format=logfmt - --log.level=${QUERY_LOG_LEVEL} - --query.auto-downsampling - - --query.lookback-delta=15m + - --query.lookback-delta=15m0s - --query.max-concurrent=10 - --query.promql-engine=prometheus - --query.replica-label=replica - --query.replica-label=prometheus_replica - --query.replica-label=rule_replica - - --query.timeout=15m + - --query.timeout=15m0s - | --tracing.config=type: JAEGER config: @@ -309,6 +202,113 @@ objects: - name: tls secret: secretName: query-rule-tls +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-rule + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-rule + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-rule-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-rule + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-rule"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-rule + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + prometheus: app-sre + name: rhobs-rhobs-observatorium-thanos-query-rule + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-rule + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml index bc5f4d52425..e8dddf24107 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml @@ -4,113 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-query objects: -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-query - weight: null -- apiVersion: v1 - kind: Service - metadata: - annotations: - service.alpha.openshift.io/serving-cert-secret-name: query-adhoc-tls - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query - namespace: rhobs - spec: - ports: - - name: http - port: 10902 - protocol: TCP - targetPort: 10902 - - name: grpc - port: 10901 - protocol: TCP - targetPort: 10901 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 - selector: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - imagePullSecrets: - - name: quay.io - kind: ServiceAccount - metadata: - annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query"}}' - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - name: observatorium-thanos-query - namespace: rhobs -- apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - prometheus: app-sre - name: observatorium-thanos-query - namespace: openshift-customer-monitoring - spec: - endpoints: - - port: http - relabelings: - - action: replace - separator: / - sourceLabels: - - namespace - - pod - targetLabel: instance - namespaceSelector: - matchNames: - - rhobs - selector: - matchLabels: - app.kubernetes.io/component: query-layer - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-query - app.kubernetes.io/part-of: observatorium - apiVersion: apps/v1 kind: Deployment metadata: @@ -168,13 +61,13 @@ objects: - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-default.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-rhel.rhobs.svc.cluster.local - --endpoint=dnssrv+_grpc._tcp.observatorium-thanos-store-telemeter.rhobs.svc.cluster.local - - --endpoint=http://observatorium-thanos-ruler-default.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-rhel.rhobs.svc.cluster.local:10902 - - --endpoint=http://observatorium-thanos-ruler-telemeter.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-default.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-rhel.rhobs.svc.cluster.local:10902 + - --endpoint=http://observatorium-thanos-rule-telemeter.rhobs.svc.cluster.local:10902 - --log.format=logfmt - --log.level=${QUERY_LOG_LEVEL} - --query.auto-downsampling - - --query.lookback-delta=15m + - --query.lookback-delta=15m0s - --query.max-concurrent=10 - --query.promql-engine=prometheus - --query.replica-label=replica @@ -193,7 +86,7 @@ objects: - --query.telemetry.request-duration-seconds-quantiles=30 - --query.telemetry.request-duration-seconds-quantiles=60 - --query.telemetry.request-duration-seconds-quantiles=120 - - --query.timeout=15m + - --query.timeout=15m0s - | --tracing.config=type: JAEGER config: @@ -322,6 +215,113 @@ objects: - name: tls secret: secretName: query-adhoc-tls +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-adhoc-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + prometheus: app-sre + name: rhobs-rhobs-observatorium-thanos-query + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-layer + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml index 638cfbd29aa..d20fc60e758 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml @@ -4,20 +4,6 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-router objects: -- apiVersion: v1 - data: - hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: kubernetes-controller - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-controller - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: main-2023-09-22-f168dd7 - name: thanos-receive-hashring - namespace: rhobs - apiVersion: apps/v1 kind: Deployment metadata: @@ -281,9 +267,11 @@ objects: terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /etc/thanos/hashring - name: hashring-config + name: hashring + readOnly: true - mountPath: /etc/thanos/receive-limits - name: receive-limits-config + name: receive-limits + readOnly: true - args: - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - --reporter.type=grpc @@ -334,10 +322,29 @@ objects: volumes: - configMap: name: thanos-receive-hashring-generated - name: hashring-config + name: hashring - configMap: name: observatorium-thanos-receive-router-limits - name: receive-limits-config + name: receive-limits +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-receive-router + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: @@ -394,7 +401,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 prometheus: app-sre - name: observatorium-thanos-receive-router + name: rhobs-observatorium-thanos-receive-router namespace: openshift-customer-monitoring spec: endpoints: @@ -415,25 +422,20 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-receive-router app.kubernetes.io/part-of: observatorium -- apiVersion: policy/v1 - kind: PodDisruptionBudget +- apiVersion: v1 + data: + hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' + kind: ConfigMap metadata: creationTimestamp: null labels: - app.kubernetes.io/component: database-write-hashring-router + app.kubernetes.io/component: kubernetes-controller app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-router + app.kubernetes.io/name: thanos-receive-controller app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-receive-router + app.kubernetes.io/version: main-2023-09-22-f168dd7 + name: thanos-receive-hashring namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-router - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-router - app.kubernetes.io/part-of: observatorium parameters: - name: ROUTER_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml index 15ef41a44a4..079015f5aa2 100755 --- a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-compact-rhel-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-rhel objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-rhel + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-compact-rhel + name: rhobs-observatorium-thanos-compact-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-compact-rhel - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-rhel - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-compact-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml index 0e41bf67c62..ee9e914f26a 100755 --- a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-receive-ingestor-rhel-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-rhel objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: rhel + observatorium/tenant: rhel + name: observatorium-thanos-receive-ingestor-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: rhel + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-receive-ingestor-rhel + name: rhobs-observatorium-thanos-receive-ingestor-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-rhel sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: rhel - observatorium/tenant: rhel - name: observatorium-thanos-receive-ingestor-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: rhel - observatorium/tenant: rhel parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml index dc742672d8b..c53368d226d 100755 --- a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-ruler-rhel-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel objects: - apiVersion: route.openshift.io/v1 kind: Route @@ -18,7 +18,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: host: "" @@ -29,7 +29,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel weight: null - apiVersion: v1 kind: Service @@ -44,7 +44,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: ports: @@ -76,7 +76,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-rhel"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-rhel"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -85,7 +85,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -99,7 +99,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-ruler-rhel + name: rhobs-observatorium-thanos-rule-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -140,7 +140,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel - name: observatorium-thanos-ruler-rhel + name: observatorium-thanos-rule-rhel namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -151,7 +151,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel - serviceName: observatorium-thanos-ruler-rhel + serviceName: observatorium-thanos-rule-rhel template: metadata: creationTimestamp: null @@ -195,10 +195,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-rhel sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -300,7 +300,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-rhel + - -openshift-service-account=observatorium-thanos-rule-rhel - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -374,7 +374,7 @@ objects: terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-rhel + serviceAccountName: observatorium-thanos-rule-rhel terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} diff --git a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml index a47cb86e37a..6050c64061d 100755 --- a/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/rhel/observatorium-metrics-store-rhel-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-rhel objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: hashmod-config-template-rhel + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,6 +163,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-rhel terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-bucket-cache-memcached-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -194,7 +241,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-rhel + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -309,6 +356,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-index-cache-memcached-rhel terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-index-cache-memcached-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel - apiVersion: v1 kind: Service metadata: @@ -366,7 +434,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-rhel + name: rhobs-observatorium-thanos-store-index-cache-memcached-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -388,53 +456,6 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - observatorium/tenant: rhel - name: hashmod-config-template-rhel - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - name: observatorium-thanos-store-bucket-cache-memcached-rhel - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: rhel - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -512,7 +533,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: rhel prometheus: app-sre - name: observatorium-thanos-store-rhel + name: rhobs-observatorium-thanos-store-rhel namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +632,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-rhel sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -797,12 +818,12 @@ objects: storageClassName: gp2 parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml index d0c7b74bf9a..250b57ee30c 100755 --- a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-compact-telemeter-template.yaml @@ -4,6 +4,53 @@ metadata: creationTimestamp: null name: observatorium-thanos-compact-telemeter objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-telemeter + weight: null - apiVersion: v1 kind: Service metadata: @@ -62,7 +109,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-compact-telemeter + name: rhobs-observatorium-thanos-compact-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -276,53 +323,6 @@ objects: requests: storage: 50Gi storageClassName: gp2 -- apiVersion: route.openshift.io/v1 - kind: Route - metadata: - annotations: - cert-manager.io/issuer-kind: ClusterIssuer - cert-manager.io/issuer-name: letsencrypt-prod-http - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-compact-telemeter - namespace: rhobs - spec: - host: "" - port: - targetPort: https - tls: - insecureEdgeTerminationPolicy: Redirect - termination: reencrypt - to: - kind: Service - name: observatorium-thanos-compact-telemeter - weight: null -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-compact-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter parameters: - name: COMPACTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml index a1c3d8b0776..bf4a2c8adef 100755 --- a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-receive-ingestor-telemeter-template.yaml @@ -4,6 +4,31 @@ metadata: creationTimestamp: null name: observatorium-thanos-receive-ingestor-telemeter objects: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: telemeter + observatorium/tenant: telemeter + name: observatorium-thanos-receive-ingestor-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-write-hashring-ingestor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-receive-ingestor + app.kubernetes.io/part-of: observatorium + controller.receive.thanos.io: thanos-receive-controller + controller.receive.thanos.io/hashring: telemeter + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -64,7 +89,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-receive-ingestor-telemeter + name: rhobs-observatorium-thanos-receive-ingestor-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -152,11 +177,15 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-receive-router + service_name: thanos-receive-ingestor-telemeter sampler_type: ratelimiting sampler_param: 2 - --tsdb.path=/var/thanos/receive env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -189,10 +218,6 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: @@ -296,31 +321,6 @@ objects: requests: storage: 5Gi storageClassName: gp2 -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: telemeter - observatorium/tenant: telemeter - name: observatorium-thanos-receive-ingestor-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: database-write-hashring-ingestor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-receive-ingestor - app.kubernetes.io/part-of: observatorium - controller.receive.thanos.io: thanos-receive-controller - controller.receive.thanos.io/hashring: telemeter - observatorium/tenant: telemeter parameters: - name: INGESTOR_CPU_REQUEST value: 200m diff --git a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml index 795a22c8ed3..c2ca3c6bb08 100755 --- a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-ruler-telemeter-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter objects: - apiVersion: route.openshift.io/v1 kind: Route @@ -18,7 +18,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs spec: host: "" @@ -29,7 +29,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter weight: null - apiVersion: v1 kind: Service @@ -44,7 +44,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs spec: ports: @@ -76,7 +76,7 @@ objects: kind: ServiceAccount metadata: annotations: - serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-ruler-telemeter"}}' + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-rule-telemeter"}}' creationTimestamp: null labels: app.kubernetes.io/component: rule-evaluation-engine @@ -85,7 +85,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -99,7 +99,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-ruler-telemeter + name: rhobs-observatorium-thanos-rule-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -140,7 +140,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter - name: observatorium-thanos-ruler-telemeter + name: observatorium-thanos-rule-telemeter namespace: rhobs spec: replicas: ${{RULER_REPLICAS}} @@ -151,7 +151,7 @@ objects: app.kubernetes.io/name: thanos-rule app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter - serviceName: observatorium-thanos-ruler-telemeter + serviceName: observatorium-thanos-rule-telemeter template: metadata: creationTimestamp: null @@ -195,10 +195,10 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-rule + service_name: thanos-rule-telemeter sampler_type: ratelimiting sampler_param: 2 - - --tsdb.retention=2d + - --tsdb.retention=48h0m0s env: - name: POD_NAME valueFrom: @@ -300,7 +300,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-ruler-telemeter + - -openshift-service-account=observatorium-thanos-rule-telemeter - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -374,7 +374,7 @@ objects: terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux - serviceAccountName: observatorium-thanos-ruler-telemeter + serviceAccountName: observatorium-thanos-rule-telemeter terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} diff --git a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml index 8bdc303d588..e66a44ca1a5 100755 --- a/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/telemeter/observatorium-metrics-store-telemeter-template.yaml @@ -4,6 +4,32 @@ metadata: creationTimestamp: null name: observatorium-thanos-store-telemeter objects: +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n + \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: hashmod-config-template-telemeter + namespace: rhobs - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -137,6 +163,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-bucket-cache-memcached-telemeter terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-store-bucket-cache-memcached-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-bucket-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -194,7 +241,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-bucket-cache-memcached-telemeter + name: rhobs-observatorium-thanos-store-bucket-cache-memcached-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -309,6 +356,27 @@ objects: kubernetes.io/os: linux serviceAccountName: observatorium-thanos-store-index-cache-memcached-telemeter terminationGracePeriodSeconds: 120 +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter + name: observatorium-thanos-store-index-cache-memcached-telemeter + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: store-index-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: memcached + app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -366,7 +434,7 @@ objects: app.kubernetes.io/version: "1.5" observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-index-cache-memcached-telemeter + name: rhobs-observatorium-thanos-store-index-cache-memcached-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -388,53 +456,6 @@ objects: app.kubernetes.io/name: memcached app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter -- apiVersion: v1 - data: - entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following - convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# - This parameter expansion removes all characters after the last hyphen, capturing - only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport - THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} - -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating - store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} - HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat - </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels:\n - \ - __block_id\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- - action: keep\n source_labels:\n - shard\n regex: ${ORDINAL_INDEX}\nEOF\n" - kind: ConfigMap - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: object-store-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-store - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.4 - observatorium/tenant: telemeter - name: hashmod-config-template-telemeter - namespace: rhobs -- apiVersion: policy/v1 - kind: PodDisruptionBudget - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - name: observatorium-thanos-store-bucket-cache-memcached-telemeter - namespace: rhobs - spec: - maxUnavailable: 1 - selector: - matchLabels: - app.kubernetes.io/component: store-bucket-cache - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: memcached - app.kubernetes.io/part-of: observatorium - observatorium/tenant: telemeter - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: @@ -512,7 +533,7 @@ objects: app.kubernetes.io/version: v0.32.4 observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store-telemeter + name: rhobs-observatorium-thanos-store-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -611,7 +632,7 @@ objects: - | --tracing.config=type: JAEGER config: - service_name: thanos-store + service_name: thanos-store-telemeter sampler_type: ratelimiting sampler_param: 2 - --store.enable-index-header-lazy-reader @@ -797,12 +818,12 @@ objects: storageClassName: gp2 parameters: - name: STORE_CPU_REQUEST - value: "4" + value: "2" - name: STORE_LOG_LEVEL value: warn - name: STORE_MEMORY_LIMIT - value: 80Gi -- name: STORE_MEMORY_REQUEST value: 20Gi +- name: STORE_MEMORY_REQUEST + value: 5Gi - name: STORE_REPLICAS value: "1" diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index 750fdb7f84f..224301d388c 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -13,7 +13,6 @@ import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" templatev1 "github.com/openshift/api/template/v1" - "github.com/prometheus/common/model" cfgobservatorium "github.com/rhobs/configuration/configuration/observatorium" "github.com/rhobs/configuration/services_go/observatorium" "gopkg.in/yaml.v3" @@ -114,7 +113,7 @@ func stageConfig() observatorium.Observatorium { { Endpoint: "/api/metrics/v1/rhel/api/v1/receive", Limit: 10000, - Window: model.Duration(30 * time.Second), + Window: time.Duration(30 * time.Second), }, }, }, @@ -130,7 +129,7 @@ func stageConfig() observatorium.Observatorium { { Endpoint: "/api/metrics/v1/.+/api/v1/receive", Limit: 10000, - Window: model.Duration(30 * time.Second), + Window: time.Duration(30 * time.Second), }, }, }, @@ -231,15 +230,17 @@ func stageConfig() observatorium.Observatorium { StorePreManifestsHook: func(store *store.StoreStatefulSet) { store.VolumeSize = "5Gi" }, - RulerPreManifestsHook: func(rulerSs *ruler.RulerStatefulSet) { - rulerSs.ConfigMaps["observatorium-rules"] = map[string]string{ - "observatorium.yaml": getTelemeterRules(), - } - rulerSs.Options.RuleFile = append(rulerSs.Options.RuleFile, ruler.RuleFileOption{ + RulerOpts: func(opts *ruler.RulerOptions) { + opts.RuleFile = append(opts.RuleFile, ruler.RuleFileOption{ FileName: "observatorium.yaml", ConfigMapName: "observatorium-rules", ParentDir: "telemeter-rules", }) + }, + RulerPreManifestsHook: func(rulerSs *ruler.RulerStatefulSet) { + rulerSs.ConfigMaps["observatorium-rules"] = map[string]string{ + "observatorium.yaml": getTelemeterRules(), + } rulerSs.Sidecars = append(rulerSs.Sidecars, &k8sutil.Container{ Name: "configmap-reloader", Image: "quay.io/openshift/origin-configmap-reloader", diff --git a/services_go/observatorium/api.go b/services_go/observatorium/api.go index 0fa3e008089..3f2448571c2 100644 --- a/services_go/observatorium/api.go +++ b/services_go/observatorium/api.go @@ -17,16 +17,13 @@ import ( "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/openshift" "github.com/observatorium/observatorium/configuration_go/schemas/log" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore" - objstoreS3 "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore/s3" upoptions "github.com/observatorium/up/pkg/options" templatev1 "github.com/openshift/api/template/v1" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "github.com/prometheus/common/model" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" ) const ( @@ -41,6 +38,7 @@ const ( obsctlReloaderImage = "quay.io/app-sre/obsctl-reloader" obsctlReloaderTag = "969b895" rulesObjstoreName = "observatorium-rules-objstore" + rulesObjstoreTag = "main-2022-09-21-9df4d2c" ) type ObservatoriumAPI struct { @@ -89,14 +87,14 @@ func (o *ObservatoriumAPI) makeAPI() encoding.Encoder { } if o.RBAC != "" { - opts.RbacConfig = observatoriumapi.NewRbacConfig(&o.RBAC) + opts.RbacConfig = observatoriumapi.NewRbacConfig(nil).WithValue(o.RBAC) } // K8s config obsapi := observatoriumapi.NewObservatoriumAPI(opts, o.Namespace, obsApiTag) obsapi.Image = obsApiImage obsapi.Replicas = 1 - delete(obsapi.PodResources.Limits, corev1.ResourceCPU) + delete(obsapi.ContainerResources.Limits, corev1.ResourceCPU) opaAmsCache := "observatorium-api-cache-memcached" cacheURL := fmt.Sprintf("%s.%s.svc.cluster.local:11211", opaAmsCache, o.Namespace) @@ -166,23 +164,54 @@ func (o *ObservatoriumAPI) makeAPI() encoding.Encoder { } func (o *ObservatoriumAPI) makeRulesObjstore() k8sutil.ObjectMap { - rulesObjstore := ruler.NewRulesObjstore() - rulesObjstore.ImageTag = "main-2022-09-21-9df4d2c" - rulesObjstore.Namespace = o.Namespace + opts := ruler.NewRulesObjstoreDefaultOptions() + // Not using the opts.ObjstoreConfigFile because it doesn't support setting the resource from an init container + // Doing some manifest post processing instead + opts.LogLevel = string(log.LogLevelWarn) + opts.LogFormat = string(log.LogFormatLogfmt) + + rulesObjstore := ruler.NewRulesObjstore(opts, o.Namespace, rulesObjstoreTag) rulesObjstore.Name = rulesObjstoreName - rulesObjstore.Env = append(rulesObjstore.Env, objStoreEnvVars(o.RuleObjStoreSecret)...) - rulesObjstore.Env = deleteObjStoreEnv(rulesObjstore.Env) - rulesObjstore.Options.ObjstoreConfigFile = ruler.NewObjstoreConfigFile("observatorium-rules-objstore", objstore.BucketConfig{ - Type: objstore.S3, - Config: objstoreS3.Config{ - Bucket: "$(OBJ_STORE_BUCKET)", - Endpoint: "$(OBJ_STORE_ENDPOINT)", - Region: "$(OBJ_STORE_REGION)", + + // Rules objstore expects a file with the objstore config. + // We generate the file from the env vars using an init container + // that writes the file to a shared volume. + initContainer := corev1.Container{ + Name: "init", + Image: "quay.io/app-sre/ubi8-ubi-minimal:8.9", + ImagePullPolicy: corev1.PullIfNotPresent, + Command: []string{ + "/bin/sh", + "-c", + "echo \"${OBJSTORE_CONFIG}\" > /tmp/config/config.yaml", + }, + Env: objStoreEnvVars(o.RuleObjStoreSecret), + VolumeMounts: []corev1.VolumeMount{ + { + Name: "objstore-config", + MountPath: "/tmp/config", + }, + }, + } + + manifests := rulesObjstore.Manifests() + deployment := k8sutil.GetObject[*appsv1.Deployment](manifests, "") + deployment.Spec.Template.Spec.InitContainers = []corev1.Container{initContainer} + deployment.Spec.Template.Spec.Volumes = append(deployment.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "objstore-config", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, }, }) - rulesObjstore.Options.LogLevel = string(log.LogLevelWarn) - rulesObjstore.Options.LogFormat = string(log.LogFormatLogfmt) - return rulesObjstore.Manifests() + mainContainer := &deployment.Spec.Template.Spec.Containers[0] + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, corev1.VolumeMount{ + Name: "objstore-config", + MountPath: "/etc/rules-objstore/objstore", + ReadOnly: true, + }) + mainContainer.Args = append(mainContainer.Args, "-objstore.config-file=/etc/rules-objstore/objstore/config.yaml") + + return manifests } func (o *ObservatoriumAPI) makeOpaAms(amsURL, memcachedUrl, clientSecretName string) *k8sutil.Container { @@ -231,7 +260,7 @@ func (o *ObservatoriumAPI) makeUp(name, endpoint string) k8sutil.ObjectMap { opts.LogLevel = log.LogLevelInfo opts.EndpointType = observatoriumup.EndpointTypeMetrics opts.EndpointRead = fmt.Sprintf("http://observatorium-thanos-query-frontend.%s.svc.cluster.local:9090", o.Namespace) - zeroDur := model.Duration(0) + zeroDur := time.Duration(0) opts.Duration = &zeroDur opts.QueriesFile = observatoriumup.NewQueriesFileOption(&observatoriumup.QueriesFile{ Queries: []upoptions.QuerySpec{ @@ -306,7 +335,7 @@ func (o *ObservatoriumAPI) makeObsCtlReloader(obsApiName string) k8sutil.ObjectM k8sutil.ComponentLabel: "rules-obsctl-reloader", k8sutil.VersionLabel: obsctlReloaderTag, }, - PodResources: k8sutil.NewResourcesRequirements("50m", "", "500Mi", "2Gi"), + ContainerResources: k8sutil.NewResourcesRequirements("50m", "", "500Mi", "2Gi"), TerminationGracePeriodSeconds: 30, } @@ -335,32 +364,52 @@ func (o *ObservatoriumAPI) makeObsCtlReloader(obsApiName string) k8sutil.ObjectM container.MonitorPorts = []monv1.Endpoint{{Port: "http"}} manifests := k8sutil.ObjectMap{} - manifests.AddAll(depl.GenerateObjects(container)) + manifests.AddAll(depl.GenerateObjectsDeployment(container)) postProcessServiceMonitor(k8sutil.GetObject[*monv1.ServiceMonitor](manifests, ""), depl.Namespace) addQuayPullSecret(k8sutil.GetObject[*corev1.ServiceAccount](manifests, depl.Name)) - rbacRules := []rbacv1.PolicyRule{ - { - APIGroups: []string{"monitoring.coreos.com"}, - Resources: []string{"prometheusrules"}, - Verbs: []string{"list", "watch", "get"}, - }, - { - APIGroups: []string{"loki.grafana.com"}, - Resources: []string{"alertingrules", "recordingrules"}, - Verbs: []string{"list", "watch", "get"}, - }, - { - APIGroups: []string{""}, - Resources: []string{"secrets"}, - Verbs: []string{"list", "watch", "get"}, + rbacRole := &rbacv1.Role{ + TypeMeta: k8sutil.RoleMeta, + ObjectMeta: depl.ObjectMeta().MakeMeta(), + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"monitoring.coreos.com"}, + Resources: []string{"prometheusrules"}, + Verbs: []string{"list", "watch", "get"}, + }, + { + APIGroups: []string{"loki.grafana.com"}, + Resources: []string{"alertingrules", "recordingrules"}, + Verbs: []string{"list", "watch", "get"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"secrets"}, + Verbs: []string{"list", "watch", "get"}, + }, }, } - rbacRole := depl.RBACRole(rbacRules) manifests.Add(rbacRole) + sa := k8sutil.GetObject[*corev1.ServiceAccount](manifests, depl.Name) - manifests.Add(depl.RBACRoleBinding([]runtime.Object{sa}, rbacRole)) + roleBinding := &rbacv1.RoleBinding{ + TypeMeta: k8sutil.RoleBindingMeta, + ObjectMeta: depl.ObjectMeta().MakeMeta(), + Subjects: []rbacv1.Subject{ + { + Kind: sa.GetObjectKind().GroupVersionKind().Kind, + Name: sa.GetName(), + Namespace: sa.GetNamespace(), + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: rbacRole.GetObjectKind().GroupVersionKind().Kind, + APIGroup: rbacRole.GetObjectKind().GroupVersionKind().Group, + Name: rbacRole.GetName(), + }, + } + manifests.Add(roleBinding) return manifests } diff --git a/services_go/observatorium/cache.go b/services_go/observatorium/cache.go index bf26579f0f3..97c2db89023 100644 --- a/services_go/observatorium/cache.go +++ b/services_go/observatorium/cache.go @@ -22,11 +22,11 @@ func makeMemcached(name, namespace string, preManifestHook func(*memcached.Memca memcachedDeployment.ImageTag = "1.5" memcachedDeployment.Namespace = namespace memcachedDeployment.Replicas = 1 - delete(memcachedDeployment.PodResources.Limits, corev1.ResourceCPU) + delete(memcachedDeployment.ContainerResources.Limits, corev1.ResourceCPU) memcachedDeployment.SecurityContext = nil - memcachedDeployment.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("500m") - memcachedDeployment.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("2Gi") - memcachedDeployment.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("3Gi") + memcachedDeployment.ContainerResources.Requests[corev1.ResourceCPU] = resource.MustParse("500m") + memcachedDeployment.ContainerResources.Requests[corev1.ResourceMemory] = resource.MustParse("2Gi") + memcachedDeployment.ContainerResources.Limits[corev1.ResourceMemory] = resource.MustParse("3Gi") memcachedDeployment.ExporterImage = "quay.io/prometheus/memcached-exporter" memcachedDeployment.ExporterImageTag = "v0.13.0" @@ -47,7 +47,7 @@ func makeMemcached(name, namespace string, preManifestHook func(*memcached.Memca // Add pod disruption budget labels := maps.Clone(k8sutil.GetObject[*appsv1.Deployment](manifests, "").ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) - manifests["store-index-cache-pdb"] = &policyv1.PodDisruptionBudget{ + manifests.Add(&policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", APIVersion: policyv1.SchemeGroupVersion.String(), @@ -67,7 +67,7 @@ func makeMemcached(name, namespace string, preManifestHook func(*memcached.Memca MatchLabels: labels, }, }, - } + }) return manifests } diff --git a/services_go/observatorium/helpers.go b/services_go/observatorium/helpers.go index 37042ccd05d..4df3ae66ec6 100644 --- a/services_go/observatorium/helpers.go +++ b/services_go/observatorium/helpers.go @@ -17,6 +17,8 @@ func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor, namespaceSe serviceMonitor.ObjectMeta.Namespace = monitoringNamespace serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{namespaceSelector} serviceMonitor.ObjectMeta.Labels["prometheus"] = "app-sre" + // Prefix the service monitor name with the namespace to avoid conflicts. + serviceMonitor.ObjectMeta.Name = namespaceSelector + "-" + serviceMonitor.ObjectMeta.Name } // deleteObjStoreEnv deletes the objstore env var from the list of env vars. diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index fae64dcf322..5cf4683cb26 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -6,6 +6,7 @@ import ( "maps" "net" "sort" + "strings" "time" "github.com/bwplotka/mimic" @@ -20,21 +21,19 @@ import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/openshift" + "github.com/observatorium/observatorium/configuration_go/schemas/log" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache" memcachedclientcfg "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache/memcached" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/log" thanostime "github.com/observatorium/observatorium/configuration_go/schemas/thanos/time" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" routev1 "github.com/openshift/api/route/v1" templatev1 "github.com/openshift/api/template/v1" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "github.com/prometheus/common/model" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -93,6 +92,7 @@ type ObservatoriumMetricsInstance struct { CompactorPreManifestsHook func(*compactor.CompactorStatefulSet) ReceiveIngestorPreManifestsHook func(*receive.Ingestor) RulerPreManifestsHook func(*ruler.RulerStatefulSet) + RulerOpts func(opts *ruler.RulerOptions) } // Tenants contains the configuration for a tenant in a metrics instance. @@ -131,7 +131,7 @@ func (o *ObservatoriumMetrics) makeAlertManager() encoding.Encoder { // Alertmanager config opts := alertmanager.NewDefaultOptions() opts.ConfigFile = alertmanager.NewConfigFile(nil).WithExistingResource("alertmanager-config", "alertmanager.yaml").AsSecret() - opts.ClusterReconnectTimeout = model.Duration(5 * time.Minute) + opts.ClusterReconnectTimeout = time.Duration(5 * time.Minute) executeIfNotNil(o.AlertManagerOpts, opts) // K8s config @@ -140,10 +140,7 @@ func (o *ObservatoriumMetrics) makeAlertManager() encoding.Encoder { alertmanSts.Replicas = 2 alertmanSts.Name = alertManagerName alertmanSts.VolumeType = "gp2" - delete(alertmanSts.PodResources.Limits, corev1.ResourceCPU) - alertmanSts.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("100m") - alertmanSts.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("256Mi") - alertmanSts.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("1Gi") + alertmanSts.ContainerResources = k8sutil.NewResourcesRequirements("100m", "", "256Mi", "1Gi") tlsSecret := "alertmanager-tls" alertmanSts.Sidecars = []k8sutil.ContainerProvider{ makeOauthProxy(9093, o.Namespace, alertmanSts.Name, tlsSecret), @@ -171,7 +168,7 @@ func (o *ObservatoriumMetrics) makeAlertManager() encoding.Encoder { serviceAccount.Annotations["serviceaccounts.openshift.io/oauth-redirectreference.application"] = fmt.Sprintf(`{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"%s"}}`, alertmanSts.Name) // Add route for oauth-proxy - manifests["oauth-proxy-route"] = &routev1.Route{ + manifests.Add(&routev1.Route{ TypeMeta: metav1.TypeMeta{ Kind: "Route", APIVersion: routev1.SchemeGroupVersion.String(), @@ -198,7 +195,7 @@ func (o *ObservatoriumMetrics) makeAlertManager() encoding.Encoder { Name: alertmanSts.Name, }, }, - } + }) // Set encoders and template params params := []templatev1.Parameter{} @@ -217,22 +214,48 @@ func (o *ObservatoriumMetrics) makeAlertManager() encoding.Encoder { } func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstance) encoding.Encoder { - rulerStatefulset := ruler.NewRuler() + name := "observatorium-thanos-rule-" + instanceCfg.InstanceName + + // Ruler config + opts := ruler.NewDefaultOptions() + opts.LogLevel = log.LogLevelWarn + opts.LogFormat = log.LogFormatLogfmt + opts.Label = []ruler.Label{ + {Key: "rule_replica", Value: "\"$(NAME)\""}, + } + opts.TracingConfig = &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: strings.TrimPrefix(name, "observatorium-"), + }, + } + opts.AlertLabelDrop = []string{"rule_replica"} + opts.TsdbRetention = time.Duration(2 * 24 * time.Hour) + opts.Query = []string{ + fmt.Sprintf("http://%s.%s.svc.cluster.local:10902", queryRuleName, o.Namespace), + } + opts.AlertmanagersUrl = []string{ + fmt.Sprintf("http://%s.%s.svc.cluster.local:9093", alertManagerName, o.Namespace), + } + opts.RuleFile = append(opts.RuleFile, ruler.RuleFileOption{ // Keep in sync with the syncer sidecar config + FileName: "observatorium.yaml", + VolumeName: "rule-syncer", + ParentDir: "synced-rules", + }) + executeIfNotNil(instanceCfg.RulerOpts, opts) // K8s config - rulerStatefulset.Name = fmt.Sprintf("%s-%s", rulerStatefulset.Name, instanceCfg.InstanceName) + rulerStatefulset := ruler.NewRuler(opts, o.Namespace, o.ThanosImageTag) + rulerStatefulset.Name = name rulerStatefulset.CommonLabels[observatoriumInstanceLabel] = instanceCfg.InstanceName rulerStatefulset.Image = thanosImage - rulerStatefulset.ImageTag = o.ThanosImageTag - rulerStatefulset.Namespace = o.Namespace rulerStatefulset.Replicas = 1 rulerStatefulset.VolumeType = "gp2" rulerStatefulset.VolumeSize = "10Gi" - delete(rulerStatefulset.PodResources.Limits, corev1.ResourceCPU) - rulerStatefulset.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("100m") - rulerStatefulset.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("256Mi") - rulerStatefulset.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("1Gi") - tlsSecret := "ruler-tls" + rulerStatefulset.ContainerResources = k8sutil.NewResourcesRequirements("100m", "", "256Mi", "1Gi") + rulesSyncer := ruler.NewRulesSyncerContainer(&ruler.RulesSyncerOptions{ File: "/etc/thanos-rule-syncer/observatorium.yaml", Interval: 60, @@ -245,11 +268,7 @@ func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstan rulesSyncer.Image = "quay.io/observatorium/thanos-rule-syncer" rulesSyncer.ImageTag = "main-2022-09-14-338f9ec" - rulerStatefulset.Options.RuleFile = append(rulerStatefulset.Options.RuleFile, ruler.RuleFileOption{ - FileName: "observatorium.yaml", - VolumeName: "rule-syncer", - ParentDir: "synced-rules", - }) + tlsSecret := "ruler-tls" rulerStatefulset.Sidecars = []k8sutil.ContainerProvider{ rulesSyncer, makeOauthProxy(10902, o.Namespace, rulerStatefulset.Name, tlsSecret), @@ -257,29 +276,6 @@ func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstan } rulerStatefulset.Env = append(rulerStatefulset.Env, objStoreEnvVars(instanceCfg.ObjStoreSecret)...) - // Ruler config - rulerStatefulset.Options.LogLevel = log.LogLevelWarn - rulerStatefulset.Options.LogFormat = log.LogFormatLogfmt - rulerStatefulset.Options.Label = []ruler.Label{ - {Key: "rule_replica", Value: "\"$(NAME)\""}, - } - rulerStatefulset.Options.TracingConfig = &trclient.TracingConfig{ - Type: trclient.Jaeger, - Config: jaeger.Config{ - SamplerParam: 2, - SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: rulerStatefulset.CommonLabels[k8sutil.NameLabel], - }, - } - rulerStatefulset.Options.AlertLabelDrop = []string{"rule_replica"} - rulerStatefulset.Options.TsdbRetention = model.Duration(2 * 24 * time.Hour) - rulerStatefulset.Options.Query = []string{ - fmt.Sprintf("http://%s.%s.svc.cluster.local:10902", queryRuleName, o.Namespace), - } - rulerStatefulset.Options.AlertmanagersUrl = []string{ - fmt.Sprintf("http://%s.%s.svc.cluster.local:9093", alertManagerName, o.Namespace), - } - // Register the store api o.storesRegister = append(o.storesRegister, fmt.Sprintf("http://%s.%s.svc.cluster.local:10902", rulerStatefulset.Name, rulerStatefulset.Namespace)) @@ -299,7 +295,7 @@ func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstan serviceAccount.Annotations["serviceaccounts.openshift.io/oauth-redirectreference.application"] = fmt.Sprintf(`{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"%s"}}`, rulerStatefulset.Name) // Add route for oauth-proxy - manifests["oauth-proxy-route"] = &routev1.Route{ + manifests.Add(&routev1.Route{ TypeMeta: metav1.TypeMeta{ Kind: "Route", APIVersion: routev1.SchemeGroupVersion.String(), @@ -326,7 +322,7 @@ func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstan Name: rulerStatefulset.Name, }, }, - } + }) // Set encoders and template params params := []templatev1.Parameter{} @@ -345,60 +341,56 @@ func (o *ObservatoriumMetrics) makeRuler(instanceCfg *ObservatoriumMetricsInstan } func (o *ObservatoriumMetrics) makeQueryFrontend() encoding.Encoder { - queryFrontend := queryfrontend.NewQueryFrontend() + // Query-frontend config + cacheName := "observatorium-thanos-query-range-cache-memcached" + zero := 0 + opts := &queryfrontend.QueryFrontendOptions{ + LogLevel: log.LogLevelWarn, + LogFormat: log.LogFormatLogfmt, + QueryFrontendCompressResponses: true, + QueryFrontendDownstreamURL: o.queryAdhocURL, + QueryFrontendLogQueriesLongerThan: time.Duration(5 * time.Second), + TracingConfig: &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: strings.TrimPrefix(obsQueryFrontendName, "observatorium-"), + }, + }, + QueryRangeSplitInterval: time.Duration(24 * time.Hour), + LabelsSplitInterval: time.Duration(24 * time.Hour), + QueryRangeMaxRetriesPerRequest: &zero, + LabelsMaxRetriesPerRequest: &zero, + LabelsDefaultTimeRange: time.Duration(14 * 24 * time.Hour), + CacheCompressionType: queryfrontend.CacheCompressionTypeSnappy, + QueryRangeResponseCacheConfig: cache.NewResponseCacheConfig(memcachedclientcfg.MemcachedClientConfig{ + Addresses: []string{ + fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", cacheName, o.Namespace), + }, + MaxAsyncBufferSize: 2 * 10e5, + MaxAsyncConcurrency: 200, + MaxGetMultiBatchSize: 100, + MaxGetMultiConcurrency: 1000, + MaxIdleConnections: 1300, + MaxItemSize: "64MiB", + Timeout: 2 * time.Second, + }), + } + + queryFrontend := queryfrontend.NewQueryFrontend(opts, o.Namespace, o.ThanosImageTag) // K8s config queryFrontend.Name = obsQueryFrontendName queryFrontend.Image = thanosImage - queryFrontend.ImageTag = o.ThanosImageTag - queryFrontend.Namespace = o.Namespace queryFrontend.Replicas = 1 - delete(queryFrontend.PodResources.Limits, corev1.ResourceCPU) - queryFrontend.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("100m") - queryFrontend.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("256Mi") - queryFrontend.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("1Gi") + queryFrontend.ContainerResources = k8sutil.NewResourcesRequirements("100m", "", "256Mi", "1Gi") tlsSecret := "query-frontend-tls" queryFrontend.Sidecars = []k8sutil.ContainerProvider{ makeOauthProxy(10902, o.Namespace, queryFrontend.Name, tlsSecret), makeJaegerAgent("observatorium-tools"), } - // Query-fe config - queryFrontend.Options.LogLevel = log.LogLevelWarn - queryFrontend.Options.LogFormat = log.LogFormatLogfmt - queryFrontend.Options.QueryFrontendCompressResponses = true - queryFrontend.Options.QueryFrontendDownstreamURL = o.queryAdhocURL - queryFrontend.Options.QueryFrontendLogQueriesLongerThan = time.Duration(5 * time.Second) - // Add memcached config - queryFrontend.Options.TracingConfig = &trclient.TracingConfig{ - Type: trclient.Jaeger, - Config: jaeger.Config{ - SamplerParam: 2, - SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: queryFrontend.CommonLabels[k8sutil.NameLabel], - }, - } - queryFrontend.Options.QueryRangeSplitInterval = time.Duration(24 * time.Hour) - queryFrontend.Options.LabelsSplitInterval = time.Duration(24 * time.Hour) - zero := 0 - queryFrontend.Options.QueryRangeMaxRetriesPerRequest = &zero - queryFrontend.Options.LabelsMaxRetriesPerRequest = &zero - queryFrontend.Options.LabelsDefaultTimeRange = time.Duration(14 * 24 * time.Hour) - queryFrontend.Options.CacheCompressionType = queryfrontend.CacheCompressionTypeSnappy - cacheName := "observatorium-thanos-query-range-cache-memcached" - queryFrontend.Options.QueryRangeResponseCacheConfig = cache.NewResponseCacheConfig(memcachedclientcfg.MemcachedClientConfig{ - Addresses: []string{ - fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", cacheName, o.Namespace), - }, - MaxAsyncBufferSize: 2 * 10e5, - MaxAsyncConcurrency: 200, - MaxGetMultiBatchSize: 100, - MaxGetMultiConcurrency: 1000, - MaxIdleConnections: 1300, - MaxItemSize: "64MiB", - Timeout: 2 * time.Second, - }) - executeIfNotNil(o.QueryFrontendPreManifestsHook, queryFrontend) // Post process @@ -415,7 +407,7 @@ func (o *ObservatoriumMetrics) makeQueryFrontend() encoding.Encoder { serviceAccount.Annotations["serviceaccounts.openshift.io/oauth-redirectreference.application"] = fmt.Sprintf(`{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"%s"}}`, queryFrontend.Name) // Add route for oauth-proxy - manifests["oauth-proxy-route"] = &routev1.Route{ + manifests.Add(&routev1.Route{ TypeMeta: metav1.TypeMeta{ Kind: "Route", APIVersion: routev1.SchemeGroupVersion.String(), @@ -442,7 +434,7 @@ func (o *ObservatoriumMetrics) makeQueryFrontend() encoding.Encoder { Name: queryFrontend.Name, }, }, - } + }) // Add cache rangeCache := "observatorium-thanos-query-range-cache-memcached" @@ -469,9 +461,41 @@ func (o *ObservatoriumMetrics) makeQueryFrontend() encoding.Encoder { } func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook func(*query.QueryDeployment)) encoding.Encoder { - queryDplt := query.NewQuery() + name := "observatorium-thanos-query" + if isRuleQuery { + name = queryRuleName + } + + // Query config + opts := &query.QueryOptions{ + LogLevel: log.LogLevelWarn, + LogFormat: log.LogFormatLogfmt, + QueryReplicaLabel: []string{"replica", "prometheus_replica", "rule_replica"}, + QueryTimeout: time.Duration(15 * time.Minute), + QueryLookbackDelta: time.Duration(15 * time.Minute), + WebPrefixHeader: "X-Forwarded-Prefix", + TracingConfig: &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: strings.TrimPrefix(name, "observatorium-"), + }, + }, + QueryAutoDownsampling: true, + QueryPromQLEngine: "prometheus", + QueryMaxConcurrent: 10, + } + opts.Endpoint = append(opts.Endpoint, o.storesRegister...) + sort.Strings(opts.Endpoint) // sort to make the output deterministic and avoid noisy diffs + + if !isRuleQuery { + opts.QueryTelemetryRequestDurationSecondsQuantiles = []float64{0.1, 0.25, 0.75, 1.25, 1.75, 2.5, 3, 5, 10, 15, 30, 60, 120} + } // K8s config + queryDplt := query.NewQuery(opts, o.Namespace, o.ThanosImageTag) + if isRuleQuery { queryDplt.Name = queryRuleName queryDplt.CommonLabels[k8sutil.NameLabel] = queryDplt.CommonLabels[k8sutil.NameLabel] + "-rule" @@ -482,13 +506,10 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook }) } queryDplt.Image = thanosImage - queryDplt.ImageTag = o.ThanosImageTag - queryDplt.Namespace = o.Namespace + queryDplt.Name = name queryDplt.Replicas = 1 - delete(queryDplt.PodResources.Limits, corev1.ResourceCPU) - queryDplt.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("250m") - queryDplt.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("2Gi") - queryDplt.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("8Gi") + queryDplt.ContainerResources = k8sutil.NewResourcesRequirements("250m", "", "2Gi", "8Gi") + var tlsSecret string if isRuleQuery { tlsSecret = "query-rule-tls" @@ -500,29 +521,7 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook makeOauthProxy(10902, o.Namespace, queryDplt.Name, tlsSecret), } - // Query config - queryDplt.Options.LogLevel = log.LogLevelWarn - queryDplt.Options.LogFormat = log.LogFormatLogfmt - queryDplt.Options.QueryReplicaLabel = []string{"replica", "prometheus_replica", "rule_replica"} - queryDplt.Options.Endpoint = append(queryDplt.Options.Endpoint, o.storesRegister...) - sort.Strings(queryDplt.Options.Endpoint) // sort to make the output deterministic and avoid noisy diffs - queryDplt.Options.QueryTimeout = model.Duration(15 * time.Minute) - queryDplt.Options.QueryLookbackDelta = model.Duration(15 * time.Minute) - queryDplt.Options.WebPrefixHeader = "X-Forwarded-Prefix" - queryDplt.Options.TracingConfig = &trclient.TracingConfig{ - Type: trclient.Jaeger, - Config: jaeger.Config{ - SamplerParam: 2, - SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: queryDplt.CommonLabels[k8sutil.NameLabel], - }, - } - queryDplt.Options.QueryAutoDownsampling = true - queryDplt.Options.QueryPromQLEngine = "prometheus" - queryDplt.Options.QueryMaxConcurrent = 10 - if !isRuleQuery { - queryDplt.Options.QueryTelemetryRequestDurationSecondsQuantiles = []float64{0.1, 0.25, 0.75, 1.25, 1.75, 2.5, 3, 5, 10, 15, 30, 60, 120} - } + executeIfNotNil(preManifestHook, queryDplt) ruleUrl := fmt.Sprintf("http://%s.%s.svc.cluster.local:10902", queryDplt.Name, queryDplt.Namespace) if isRuleQuery { @@ -531,8 +530,6 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook o.queryAdhocURL = ruleUrl } - executeIfNotNil(preManifestHook, queryDplt) - // Post process manifests := queryDplt.Manifests() postProcessServiceMonitor(k8sutil.GetObject[*monv1.ServiceMonitor](manifests, ""), queryDplt.Namespace) @@ -548,7 +545,7 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook serviceAccount.Annotations["serviceaccounts.openshift.io/oauth-redirectreference.application"] = fmt.Sprintf(`{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"%s"}}`, queryDplt.Name) // Add route for oauth-proxy - manifests["oauth-proxy-route"] = &routev1.Route{ + manifests.Add(&routev1.Route{ TypeMeta: metav1.TypeMeta{ Kind: "Route", APIVersion: routev1.SchemeGroupVersion.String(), @@ -575,7 +572,7 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook Name: queryDplt.Name, }, }, - } + }) // Set encoders and template params params := []templatev1.Parameter{} @@ -596,42 +593,30 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook // makeReceiveRouter creates a base receive router component that can be derived from using the preManifestsHook // for each tenant instance of the observatorium metrics. func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { - router := receive.NewRouter() - - // K8s config - router.Name = receiveRouterName - router.Image = thanosImage - router.ImageTag = o.ThanosImageTag - router.Namespace = o.Namespace - router.Replicas = 1 - delete(router.PodResources.Limits, corev1.ResourceCPU) - router.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") - router.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("3Gi") - router.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("10Gi") - router.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} - - // Router config - router.Options.LogLevel = log.LogLevelWarn - router.Options.LogFormat = log.LogFormatLogfmt - router.Options.TracingConfig = &trclient.TracingConfig{ + // Receive router config + opts := receive.NewDefaultRouterOptions() + opts.TracingConfig = &trclient.TracingConfig{ Type: trclient.Jaeger, Config: jaeger.Config{ SamplerParam: 2, SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: "thanos-receive-router", + ServiceName: strings.TrimPrefix(receiveRouterName, "observatorium-"), }, } - router.Options.Label = []receive.Label{ + opts.Label = []receive.Label{ { Key: "receive", Value: "\"true\"", }, } - receiveLimits := receive.NewReceiveLimitsConfig() - receiveLimits.WriteLimits.DefaultLimits = o.ReceiveLimitsDefault - receiveLimits.WriteLimits.GlobalLimits = o.ReceiveLimitsGlobal - receiveLimits.WriteLimits.TenantsLimits = map[string]receive.WriteLimitConfig{} + receiveLimits := &receive.ReceiveLimitsConfig{ + WriteLimits: receive.WriteLimitsConfig{ + DefaultLimits: o.ReceiveLimitsDefault, + GlobalLimits: o.ReceiveLimitsGlobal, + TenantsLimits: map[string]receive.WriteLimitConfig{}, + }, + } for _, instanceCfg := range o.Instances { for _, tenant := range instanceCfg.Tenants { if tenant.ReceiveLimits == nil { @@ -641,11 +626,20 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { receiveLimits.WriteLimits.TenantsLimits[tenant.ID] = *tenant.ReceiveLimits } } - router.Options.ReceiveLimitsConfigFile = receive.NewReceiveLimitsConfigFile(router.Name+"-limits", receiveLimits) + opts.ReceiveLimitsConfigFile = receive.NewReceiveLimitsConfigFile(receiveLimits).WithResourceName("observatorium-thanos-receive-router-limits") generatedHashringCm := "thanos-receive-hashring-generated" // Leave the config map empty, it is generated by the controller - router.Options.ReceiveHashringsFile = receive.NewReceiveHashringConfigFile(generatedHashringCm, receive.HashRingsConfig{}) + opts.ReceiveHashringsFile = receive.NewReceiveHashringConfigFile(nil).WithResourceName(generatedHashringCm) + + router := receive.NewRouter(opts, o.Namespace, o.ThanosImageTag) + + // K8s config + router.Name = receiveRouterName + router.Image = thanosImage + router.Replicas = 1 + router.ContainerResources = k8sutil.NewResourcesRequirements("200m", "", "3Gi", "10Gi") + router.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} executeIfNotNil(o.ReceiveRouterPreManifestsHook, router) @@ -658,7 +652,7 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { // Add pod disruption budget labels := maps.Clone(k8sutil.GetObject[*appsv1.Deployment](manifests, "").ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) - manifests["router-pdb"] = &policyv1.PodDisruptionBudget{ + manifests.Add(&policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", APIVersion: policyv1.SchemeGroupVersion.String(), @@ -678,14 +672,20 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { MatchLabels: labels, }, }, - } + }) // Add thanos-receive-controller - controller := receive.NewController() + hashringFileName := "hashrings.json" + ctrlOpts := &receive.ControllerOptions{ + ConfigMapName: baseHashringCm, + ConfigMapGeneratedName: generatedHashringCm, + Namespace: o.Namespace, + FileName: hashringFileName, + } + // Controller k8s config + controller := receive.NewController(ctrlOpts, o.Namespace, o.ReceiveControllerImageTag) controller.Image = thanosReceiveControllerImage - controller.ImageTag = o.ReceiveControllerImageTag - controller.Namespace = o.Namespace controller.Replicas = 1 var baseHashring receive.HashRingsConfig = []receive.HashringConfig{} @@ -701,21 +701,11 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { baseHashring = append(baseHashring, newHashring) } - hashringFileName := "hashrings.json" controller.ConfigMaps[baseHashringCm] = map[string]string{ hashringFileName: baseHashring.String(), } - // Controller config - controller.Options.ConfigMapName = baseHashringCm - controller.Options.ConfigMapGeneratedName = generatedHashringCm - controller.Options.Namespace = o.Namespace - controller.Options.FileName = hashringFileName - - controllerManifests := controller.Manifests() - for k, v := range controllerManifests { - manifests[k] = v - } + maps.Copy(manifests, controller.Manifests()) // Set encoders and template params params := []templatev1.Parameter{} @@ -730,41 +720,37 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { // makeReceiveIngestor creates a base receive ingestor component that can be derived from using the preManifestsHook func (o *ObservatoriumMetrics) makeTenantReceiveIngestor(instanceCfg *ObservatoriumMetricsInstance) encoding.Encoder { - ingestor := receive.NewIngestor() - ingestor.Name = fmt.Sprintf("%s-%s", ingestor.Name, instanceCfg.InstanceName) - ingestor.CommonLabels[observatoriumInstanceLabel] = instanceCfg.InstanceName - ingestor.Image = thanosImage - ingestor.ImageTag = o.ThanosImageTag - ingestor.Namespace = o.Namespace - ingestor.Replicas = 1 - ingestor.VolumeType = "gp2" - ingestor.VolumeSize = "50Gi" - delete(ingestor.PodResources.Limits, corev1.ResourceCPU) - ingestor.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") - ingestor.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("3Gi") - ingestor.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("10Gi") - ingestor.Env = deleteObjStoreEnv(ingestor.Env) // delete the default objstore env vars - ingestor.Env = append(ingestor.Env, objStoreEnvVars(instanceCfg.ObjStoreSecret)...) - ingestor.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} - + name := "observatorium-thanos-receive-ingestor-" + instanceCfg.InstanceName // Router config - ingestor.Options.LogLevel = log.LogLevelWarn - ingestor.Options.LogFormat = log.LogFormatLogfmt - ingestor.Options.TracingConfig = &trclient.TracingConfig{ + opts := receive.NewDefaultIngestorOptions() + opts.TracingConfig = &trclient.TracingConfig{ Type: trclient.Jaeger, Config: jaeger.Config{ SamplerParam: 2, SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: "thanos-receive-router", + ServiceName: strings.TrimPrefix(name, "observatorium-"), }, } - ingestor.Options.Label = []receive.Label{ + opts.Label = []receive.Label{ { Key: "replica", Value: "\"$(POD_NAME)\"", }, } + // K8s config + ingestor := receive.NewIngestor(opts, o.Namespace, o.ThanosImageTag) + ingestor.Name = name + ingestor.CommonLabels[observatoriumInstanceLabel] = instanceCfg.InstanceName + ingestor.Image = thanosImage + ingestor.Replicas = 1 + ingestor.VolumeType = "gp2" + ingestor.VolumeSize = "50Gi" + ingestor.ContainerResources = k8sutil.NewResourcesRequirements("200m", "", "3Gi", "10Gi") + ingestor.Env = deleteObjStoreEnv(ingestor.Env) // delete the default objstore env vars + ingestor.Env = append(ingestor.Env, objStoreEnvVars(instanceCfg.ObjStoreSecret)...) + ingestor.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} + executeIfNotNil(instanceCfg.ReceiveIngestorPreManifestsHook, ingestor) // Register the store for the query component @@ -781,7 +767,7 @@ func (o *ObservatoriumMetrics) makeTenantReceiveIngestor(instanceCfg *Observator // Add pod disruption budget labels := maps.Clone(k8sutil.GetObject[*appsv1.StatefulSet](manifests, "").ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) - manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + manifests.Add(&policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", APIVersion: policyv1.SchemeGroupVersion.String(), @@ -801,7 +787,7 @@ func (o *ObservatoriumMetrics) makeTenantReceiveIngestor(instanceCfg *Observator MatchLabels: labels, }, }, - } + }) // Set encoders and template params params := []templatev1.Parameter{} @@ -816,18 +802,25 @@ func (o *ObservatoriumMetrics) makeTenantReceiveIngestor(instanceCfg *Observator // makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsInstance) encoding.Encoder { + // Compactor config + opts := compactor.NewDefaultOptions() + opts.LogLevel = log.LogLevelWarn + opts.RetentionResolutionRaw = 0 + opts.RetentionResolution5m = 0 + opts.RetentionResolution1h = 0 + opts.DeleteDelay = 24 * time.Hour + opts.CompactConcurrency = 1 + opts.DownsampleConcurrency = 1 + opts.DeduplicationReplicaLabel = "replica" + opts.AddExtraOpts("--debug.max-compaction-level=3") + // K8s config - compactorSatefulset := compactor.NewCompactor() + compactorSatefulset := compactor.NewCompactor(opts, o.Namespace, o.ThanosImageTag) compactorSatefulset.Name = fmt.Sprintf("%s-%s", compactorSatefulset.Name, instanceCfg.InstanceName) compactorSatefulset.CommonLabels[observatoriumInstanceLabel] = instanceCfg.InstanceName compactorSatefulset.Image = thanosImage - compactorSatefulset.ImageTag = o.ThanosImageTag - compactorSatefulset.Namespace = o.Namespace compactorSatefulset.Replicas = 1 - delete(compactorSatefulset.PodResources.Limits, corev1.ResourceCPU) - compactorSatefulset.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") - compactorSatefulset.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("1Gi") - compactorSatefulset.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("5Gi") + compactorSatefulset.ContainerResources = k8sutil.NewResourcesRequirements("200m", "", "1Gi", "5Gi") compactorSatefulset.VolumeType = "gp2" compactorSatefulset.VolumeSize = "50Gi" compactorSatefulset.Env = deleteObjStoreEnv(compactorSatefulset.Env) // delete the default objstore env vars @@ -835,17 +828,6 @@ func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsIn tlsSecret := "compact-tls-" + instanceCfg.InstanceName compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, o.Namespace, compactorSatefulset.Name, tlsSecret)} - // Compactor config - compactorSatefulset.Options.LogLevel = log.LogLevelWarn - compactorSatefulset.Options.RetentionResolutionRaw = 0 - compactorSatefulset.Options.RetentionResolution5m = 0 - compactorSatefulset.Options.RetentionResolution1h = 0 - compactorSatefulset.Options.DeleteDelay = 24 * time.Hour - compactorSatefulset.Options.CompactConcurrency = 1 - compactorSatefulset.Options.DownsampleConcurrency = 1 - compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" - compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") - executeIfNotNil(instanceCfg.CompactorPreManifestsHook, compactorSatefulset) // Post process @@ -863,7 +845,7 @@ func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsIn // Add pod disruption budget labels := maps.Clone(k8sutil.GetObject[*appsv1.StatefulSet](manifests, "").ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) - manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + manifests.Add(&policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", APIVersion: policyv1.SchemeGroupVersion.String(), @@ -883,10 +865,10 @@ func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsIn MatchLabels: labels, }, }, - } + }) // Add route for oauth-proxy - manifests["oauth-proxy-route"] = &routev1.Route{ + manifests.Add(&routev1.Route{ TypeMeta: metav1.TypeMeta{ Kind: "Route", APIVersion: routev1.SchemeGroupVersion.String(), @@ -913,7 +895,7 @@ func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsIn Name: compactorSatefulset.Name, }, }, - } + }) // Set encoders and template params params := []templatev1.Parameter{} @@ -933,18 +915,73 @@ func (o *ObservatoriumMetrics) makeCompactor(instanceCfg *ObservatoriumMetricsIn // makeStore creates a base store component that can be derived from using the preManifestsHook. func (o *ObservatoriumMetrics) makeStore(instanceCfg *ObservatoriumMetricsInstance) encoding.Encoder { + name := "observatorium-thanos-store-" + instanceCfg.InstanceName + + // Store config + maxTimeDur := time.Duration(-22) * time.Hour + hasmodConfigPath := "/etc/thanos/hashmod" + opts := &store.StoreOptions{ + LogFormat: log.LogFormatLogfmt, + LogLevel: log.LogLevelWarn, + IgnoreDeletionMarksDelay: 24 * time.Hour, + DataDir: "/var/thanos/store", + ObjstoreConfig: "$(OBJSTORE_CONFIG)", + MaxTime: &thanostime.TimeOrDurationValue{Dur: &maxTimeDur}, + SelectorRelabelConfigFile: fmt.Sprintf("%s/hashmod-config.yaml", hasmodConfigPath), + TracingConfig: &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: strings.TrimPrefix(name, "observatorium-"), + }, + }, + } + opts.AddExtraOpts("--store.enable-index-header-lazy-reader") + + indexCacheName := fmt.Sprintf("observatorium-thanos-store-index-cache-memcached-%s", instanceCfg.InstanceName) + bucketCacheName := fmt.Sprintf("observatorium-thanos-store-bucket-cache-memcached-%s", instanceCfg.InstanceName) + opts.IndexCacheConfig = cache.NewIndexCacheConfig(memcachedclientcfg.MemcachedClientConfig{ + Addresses: []string{ + fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", indexCacheName, o.Namespace), + }, + DNSProviderUpdateInterval: 10 * time.Second, + MaxAsyncBufferSize: 2500000, + MaxAsyncConcurrency: 1000, + MaxGetMultiBatchSize: 100000, + MaxGetMultiConcurrency: 1000, + MaxIdleConnections: 2500, + MaxItemSize: "5MiB", + Timeout: 2 * time.Second, + }) + memCache := cache.NewBucketCacheConfig(memcachedclientcfg.MemcachedClientConfig{ + Addresses: []string{ + fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", indexCacheName, o.Namespace), + }, + DNSProviderUpdateInterval: 10 * time.Second, + MaxAsyncBufferSize: 2500000, + MaxAsyncConcurrency: 1000, + MaxGetMultiBatchSize: 100000, + MaxGetMultiConcurrency: 1000, + MaxIdleConnections: 2500, + MaxItemSize: "1MiB", + Timeout: 2 * time.Second, + }) + memCache.MaxChunksGetRangeRequests = 3 + memCache.MetafileMaxSize = "1MiB" + memCache.MetafileExistsTTL = 2 * time.Hour + memCache.MetafileDoesntExistTTL = 15 * time.Minute + memCache.MetafileContentTTL = 24 * time.Hour + + opts.AddExtraOpts(fmt.Sprintf("--store.caching-bucket.config=%s", memCache.String())) + // K8s config - storeStatefulSet := store.NewStore() - storeStatefulSet.Name = fmt.Sprintf("%s-%s", storeStatefulSet.Name, instanceCfg.InstanceName) + storeStatefulSet := store.NewStore(opts, o.Namespace, o.ThanosImageTag) + storeStatefulSet.Name = name storeStatefulSet.CommonLabels[observatoriumInstanceLabel] = instanceCfg.InstanceName storeStatefulSet.Image = thanosImage - storeStatefulSet.ImageTag = o.ThanosImageTag - storeStatefulSet.Namespace = o.Namespace storeStatefulSet.Replicas = 1 - delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) - storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("4") - storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("20Gi") - storeStatefulSet.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("80Gi") + storeStatefulSet.ContainerResources = k8sutil.NewResourcesRequirements("2", "", "5Gi", "20Gi") storeStatefulSet.VolumeType = "gp2" storeStatefulSet.VolumeSize = "50Gi" storeStatefulSet.Env = deleteObjStoreEnv(storeStatefulSet.Env) // delete the default objstore env vars @@ -989,60 +1026,6 @@ func (o *ObservatoriumMetrics) makeStore(instanceCfg *ObservatoriumMetricsInstan }, } - // Store config - storeStatefulSet.Options.LogLevel = log.LogLevelWarn - storeStatefulSet.Options.LogFormat = log.LogFormatLogfmt - storeStatefulSet.Options.IgnoreDeletionMarksDelay = 24 * time.Hour - maxTimeDur := time.Duration(-22) * time.Hour - storeStatefulSet.Options.MaxTime = &thanostime.TimeOrDurationValue{Dur: &maxTimeDur} - hasmodConfigPath := "/etc/thanos/hashmod" - storeStatefulSet.Options.SelectorRelabelConfigFile = fmt.Sprintf("%s/hashmod-config.yaml", hasmodConfigPath) - storeStatefulSet.Options.TracingConfig = &trclient.TracingConfig{ - Type: trclient.Jaeger, - Config: jaeger.Config{ - SamplerParam: 2, - SamplerType: jaeger.SamplerTypeRateLimiting, - ServiceName: "thanos-store", - }, - } - // storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. - storeStatefulSet.Options.AddExtraOpts("--store.enable-index-header-lazy-reader") - indexCacheName := fmt.Sprintf("observatorium-thanos-store-index-cache-memcached-%s", instanceCfg.InstanceName) - bucketCacheName := fmt.Sprintf("observatorium-thanos-store-bucket-cache-memcached-%s", instanceCfg.InstanceName) - storeStatefulSet.Options.IndexCacheConfig = cache.NewIndexCacheConfig(memcachedclientcfg.MemcachedClientConfig{ - Addresses: []string{ - fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", indexCacheName, o.Namespace), - }, - DNSProviderUpdateInterval: 10 * time.Second, - MaxAsyncBufferSize: 2500000, - MaxAsyncConcurrency: 1000, - MaxGetMultiBatchSize: 100000, - MaxGetMultiConcurrency: 1000, - MaxIdleConnections: 2500, - MaxItemSize: "5MiB", - Timeout: 2 * time.Second, - }) - memCache := cache.NewBucketCacheConfig(memcachedclientcfg.MemcachedClientConfig{ - Addresses: []string{ - fmt.Sprintf("dnssrv+_client._tcp.%s.%s.svc", indexCacheName, o.Namespace), - }, - DNSProviderUpdateInterval: 10 * time.Second, - MaxAsyncBufferSize: 2500000, - MaxAsyncConcurrency: 1000, - MaxGetMultiBatchSize: 100000, - MaxGetMultiConcurrency: 1000, - MaxIdleConnections: 2500, - MaxItemSize: "1MiB", - Timeout: 2 * time.Second, - }) - memCache.MaxChunksGetRangeRequests = 3 - memCache.MetafileMaxSize = "1MiB" - memCache.MetafileExistsTTL = 2 * time.Hour - memCache.MetafileDoesntExistTTL = 15 * time.Minute - memCache.MetafileContentTTL = 24 * time.Hour - - storeStatefulSet.Options.AddExtraOpts(fmt.Sprintf("--store.caching-bucket.config=%s", memCache.String())) - executeIfNotNil(instanceCfg.StorePreManifestsHook, storeStatefulSet) // Register the store for the query component @@ -1099,10 +1082,9 @@ func (o *ObservatoriumMetrics) makeStore(instanceCfg *ObservatoriumMetricsInstan }, }, } + manifests.Add(listPodsRole) - manifests["list-pods-rbac"] = listPodsRole - - manifests["list-pods-rbac-binding"] = &rbacv1.RoleBinding{ + roleBinding := &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{ Kind: "RoleBinding", APIVersion: rbacv1.SchemeGroupVersion.String(), @@ -1126,9 +1108,10 @@ func (o *ObservatoriumMetrics) makeStore(instanceCfg *ObservatoriumMetricsInstan APIGroup: "rbac.authorization.k8s.io", }, } + manifests.Add(roleBinding) // Add pod disruption budget - manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + pdb := &policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", APIVersion: policyv1.SchemeGroupVersion.String(), @@ -1149,6 +1132,7 @@ func (o *ObservatoriumMetrics) makeStore(instanceCfg *ObservatoriumMetricsInstan }, }, } + manifests.Add(pdb) // Add index cache cachePreManHook := func(memdep *memcached.MemcachedDeployment) {