From dfddd124deed859d9e12aa0c4a007d3cd6fe3560 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Mon, 4 Dec 2023 16:44:27 +0100 Subject: [PATCH] add query fe Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 +- ...orium-metrics-query-frontend-template.yaml | 309 ++++++++++++++++++ .../observatorium-metrics-query-template.yaml | 18 +- ...orium-metrics-receive-router-template.yaml | 2 +- ...orium-metrics-query-frontend-template.yaml | 309 ++++++++++++++++++ .../observatorium-metrics-query-template.yaml | 18 +- ...orium-metrics-receive-router-template.yaml | 2 +- services_go/observatorium/metrics.go | 124 ++++++- 9 files changed, 763 insertions(+), 25 deletions(-) create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml diff --git a/go.mod b/go.mod index af9ba8ed9a..cf76f51f86 100644 --- a/go.mod +++ b/go.mod @@ -83,4 +83,4 @@ require ( ) // Delete when https://github.com/observatorium/observatorium/pull/543 is merged to main branch -replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20231123172357-0705fe74fcd5 +replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20231124164749-d47b398822ba diff --git a/go.sum b/go.sum index efd159a27f..d13e8c857a 100644 --- a/go.sum +++ b/go.sum @@ -1253,8 +1253,8 @@ github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= -github.com/thibaultmg/observatorium v0.0.0-20231123172357-0705fe74fcd5 h1:EQ3Qb3hsVxrhlWKl3DWettTcHwKGbGyJp4t6j0q4W3E= -github.com/thibaultmg/observatorium v0.0.0-20231123172357-0705fe74fcd5/go.mod h1:P+7t9O8AitkuZjUhXC4LHw4iwAzTpIrs0tHz8X3xTvM= +github.com/thibaultmg/observatorium v0.0.0-20231124164749-d47b398822ba h1:C0lH8h/Dh6SCVG6FCNBsn1Rg075gQd3TPBVzjcdCIsk= +github.com/thibaultmg/observatorium v0.0.0-20231124164749-d47b398822ba/go.mod h1:P+7t9O8AitkuZjUhXC4LHw4iwAzTpIrs0tHz8X3xTvM= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tinylib/msgp v1.0.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml new file mode 100755 index 0000000000..4337b384b9 --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-frontend-template.yaml @@ -0,0 +1,309 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-query-frontend +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-frontend + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-frontend-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-frontend"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-frontend + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + prometheus: app-sre + name: observatorium-thanos-query-frontend + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + replicas: ${{REPLICAS}} + selector: + matchLabels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.5 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-query-frontend + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - query-frontend + - --cache-compression-type=snappy + - --labels.default-time-range=2w + - --labels.max-retries-per-request=0 + - --labels.split-interval=1d + - --log.format=logfmt + - --log.level=${LOG_LEVEL} + - --query-frontend.compress-responses + - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 + - --query-frontend.log-queries-longer-than=5s + - --query-range.max-retries-per-request=0 + - --query-range.split-interval=1d + - | + --tracing.config=type: JAEGER + config: + service_name: thanos-query-frontend + sampler_type: ratelimiting + sampler_param: 2 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + image: quay.io/thanos/thanos:v0.32.5 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 9090 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 9090 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 9090 + periodSeconds: 5 + resources: + limits: + memory: ${MEMORY_LIMIT} + requests: + cpu: ${CPU_REQUEST} + memory: ${MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-query-frontend + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:4.15 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: tls + readOnly: true + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-thanos-query-frontend + terminationGracePeriodSeconds: 120 + volumes: + - name: tls + secret: + secretName: query-frontend-tls +parameters: +- name: LOG_LEVEL + value: warn +- name: REPLICAS + value: "1" +- name: CPU_REQUEST + value: 100m +- name: MEMORY_LIMIT + value: 1Gi +- name: MEMORY_REQUEST + value: 256Mi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml index 3b6c0897c3..35be9bacaf 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-query-template.yaml @@ -177,19 +177,19 @@ objects: - --query.replica-label=replica - --query.replica-label=prometheus_replica - --query.replica-label=rule_replica - - --query.telemetry.request-duration-seconds-quantiles=0.10 + - --query.telemetry.request-duration-seconds-quantiles=0.1 - --query.telemetry.request-duration-seconds-quantiles=0.25 - --query.telemetry.request-duration-seconds-quantiles=0.75 - --query.telemetry.request-duration-seconds-quantiles=1.25 - --query.telemetry.request-duration-seconds-quantiles=1.75 - - --query.telemetry.request-duration-seconds-quantiles=2.50 - - --query.telemetry.request-duration-seconds-quantiles=3.00 - - --query.telemetry.request-duration-seconds-quantiles=5.00 - - --query.telemetry.request-duration-seconds-quantiles=10.00 - - --query.telemetry.request-duration-seconds-quantiles=15.00 - - --query.telemetry.request-duration-seconds-quantiles=30.00 - - --query.telemetry.request-duration-seconds-quantiles=60.00 - - --query.telemetry.request-duration-seconds-quantiles=120.00 + - --query.telemetry.request-duration-seconds-quantiles=2.5 + - --query.telemetry.request-duration-seconds-quantiles=3 + - --query.telemetry.request-duration-seconds-quantiles=5 + - --query.telemetry.request-duration-seconds-quantiles=10 + - --query.telemetry.request-duration-seconds-quantiles=15 + - --query.telemetry.request-duration-seconds-quantiles=30 + - --query.telemetry.request-duration-seconds-quantiles=60 + - --query.telemetry.request-duration-seconds-quantiles=120 - --query.timeout=15m - | --tracing.config=type: JAEGER diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml index 89dbbb516d..2720807468 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-receive-router-template.yaml @@ -6,7 +6,7 @@ metadata: objects: - apiVersion: v1 data: - hashring.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' + hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' kind: ConfigMap metadata: creationTimestamp: null diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml new file mode 100755 index 0000000000..86ecef1eb1 --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-frontend-template.yaml @@ -0,0 +1,309 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-query-frontend +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-query-frontend + weight: null +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: query-frontend-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium +- apiVersion: v1 + imagePullSecrets: + - name: quay.io + kind: ServiceAccount + metadata: + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.application: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"observatorium-thanos-query-frontend"}}' + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-frontend + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + prometheus: app-sre + name: observatorium-thanos-query-frontend + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: Deployment + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + name: observatorium-thanos-query-frontend + namespace: rhobs + spec: + replicas: ${{REPLICAS}} + selector: + matchLabels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: query-cache + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-query-frontend + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-query-frontend + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - query-frontend + - --cache-compression-type=snappy + - --labels.default-time-range=2w + - --labels.max-retries-per-request=0 + - --labels.split-interval=1d + - --log.format=logfmt + - --log.level=${LOG_LEVEL} + - --query-frontend.compress-responses + - --query-frontend.downstream-url=http://observatorium-thanos-query.rhobs.svc.cluster.local:10902 + - --query-frontend.log-queries-longer-than=5s + - --query-range.max-retries-per-request=0 + - --query-range.split-interval=1d + - | + --tracing.config=type: JAEGER + config: + service_name: thanos-query-frontend + sampler_type: ratelimiting + sampler_param: 2 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 9090 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 9090 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 9090 + periodSeconds: 5 + resources: + limits: + memory: ${MEMORY_LIMIT} + requests: + cpu: ${CPU_REQUEST} + memory: ${MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-query-frontend + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:4.15 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: tls + readOnly: true + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: observatorium-thanos-query-frontend + terminationGracePeriodSeconds: 120 + volumes: + - name: tls + secret: + secretName: query-frontend-tls +parameters: +- name: LOG_LEVEL + value: warn +- name: REPLICAS + value: "1" +- name: CPU_REQUEST + value: 100m +- name: MEMORY_LIMIT + value: 1Gi +- name: MEMORY_REQUEST + value: 256Mi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml index 8f7e40163d..528212d27a 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-query-template.yaml @@ -177,19 +177,19 @@ objects: - --query.replica-label=replica - --query.replica-label=prometheus_replica - --query.replica-label=rule_replica - - --query.telemetry.request-duration-seconds-quantiles=0.10 + - --query.telemetry.request-duration-seconds-quantiles=0.1 - --query.telemetry.request-duration-seconds-quantiles=0.25 - --query.telemetry.request-duration-seconds-quantiles=0.75 - --query.telemetry.request-duration-seconds-quantiles=1.25 - --query.telemetry.request-duration-seconds-quantiles=1.75 - - --query.telemetry.request-duration-seconds-quantiles=2.50 - - --query.telemetry.request-duration-seconds-quantiles=3.00 - - --query.telemetry.request-duration-seconds-quantiles=5.00 - - --query.telemetry.request-duration-seconds-quantiles=10.00 - - --query.telemetry.request-duration-seconds-quantiles=15.00 - - --query.telemetry.request-duration-seconds-quantiles=30.00 - - --query.telemetry.request-duration-seconds-quantiles=60.00 - - --query.telemetry.request-duration-seconds-quantiles=120.00 + - --query.telemetry.request-duration-seconds-quantiles=2.5 + - --query.telemetry.request-duration-seconds-quantiles=3 + - --query.telemetry.request-duration-seconds-quantiles=5 + - --query.telemetry.request-duration-seconds-quantiles=10 + - --query.telemetry.request-duration-seconds-quantiles=15 + - --query.telemetry.request-duration-seconds-quantiles=30 + - --query.telemetry.request-duration-seconds-quantiles=60 + - --query.telemetry.request-duration-seconds-quantiles=120 - --query.timeout=15m - | --tracing.config=type: JAEGER diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml index d68703f37f..9ede871d5f 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-receive-router-template.yaml @@ -6,7 +6,7 @@ metadata: objects: - apiVersion: v1 data: - hashring.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' + hashrings.json: '[{"hashring":"default","tenants":["3833951d-bede-4a53-85e5-f73f4913973f","9ca26972-4328-4fe3-92db-31302013d03f","AC879303-C60F-4D0D-A6D5-A485CFD638B8","99c885bc-2d64-4c4d-b55e-8bf30d98c657","770c1124-6ae8-4324-a9d4-9ce08590094b","37b8fd3f-56ff-4b64-8272-917c9b0d1623","d17ea8ce-d4c6-42ef-b259-7d10c9227e93","1b9b6e43-9128-4bbf-bfff-3c120bbe6f11","0fc2b00e-201b-4c17-b9f2-19d91adc4fd2","8ace13a2-1c72-4559-b43d-ab43e32a255a","0031e8d6-e50a-47ea-aecb-c7e0bd84b3f1"],"algorithm":"ketama"},{"hashring":"rhel","tenants":["72e6f641-b2e2-47eb-bbc2-fee3c8fbda26"],"algorithm":"ketama"},{"hashring":"telemeter","tenants":["FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"],"algorithm":"ketama"}]' kind: ConfigMap metadata: creationTimestamp: null diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index a734b59020..e0a8b0cf09 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -12,6 +12,7 @@ import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/memcached" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/query" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/queryfrontend" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/receive" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" @@ -64,7 +65,9 @@ type ObservatoriumMetrics struct { ReceiveRouterPreManifestsHook func(*receive.Router) QueryRulePreManifestsHook func(*query.QueryDeployment) QueryAdhocPreManifestsHook func(*query.QueryDeployment) + QueryFrontendPreManifestsHook func(*queryfrontend.QueryFrontendDeployment) storesRegister []string + queryAdhocURL string } // ObservatoriumMetricsInstance contains the configuration for a metrics instance in an observatorium instance. @@ -107,6 +110,118 @@ func (o *ObservatoriumMetrics) Manifests(generator *mimic.Generator) { generator.Add("observatorium-metrics-receive-router-template.yaml", withStatusRemove(o.makeReceiveRouter())) generator.Add("observatorium-metrics-query-rule-template.yaml", withStatusRemove(o.makeQueryConfig(true, o.QueryRulePreManifestsHook))) generator.Add("observatorium-metrics-query-template.yaml", withStatusRemove(o.makeQueryConfig(false, o.QueryAdhocPreManifestsHook))) + generator.Add("observatorium-metrics-query-frontend-template.yaml", withStatusRemove(o.makeQueryFrontend())) +} + +func (o *ObservatoriumMetrics) makeQueryFrontend() encoding.Encoder { + queryFrontend := queryfrontend.NewQueryFrontend() + + // K8s config + queryFrontend.Image = thanosImage + queryFrontend.ImageTag = o.ThanosImageTag + queryFrontend.Namespace = o.Namespace + queryFrontend.Replicas = 1 + delete(queryFrontend.PodResources.Limits, corev1.ResourceCPU) + queryFrontend.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("100m") + queryFrontend.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("256Mi") + queryFrontend.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("1Gi") + tlsSecret := "query-frontend-tls" + queryFrontend.Sidecars = []k8sutil.ContainerProvider{ + makeOauthProxy(10902, o.Namespace, queryFrontend.Name, tlsSecret), + makeJaegerAgent("observatorium-tools"), + } + + // Query-fe config + queryFrontend.Options.LogLevel = log.LogLevelWarn + queryFrontend.Options.LogFormat = log.LogFormatLogfmt + queryFrontend.Options.QueryFrontendCompressResponses = true + queryFrontend.Options.QueryFrontendDownstreamURL = o.queryAdhocURL + queryFrontend.Options.QueryFrontendLogQueriesLongerThan = model.Duration(5 * time.Second) + // Add memcached config + queryFrontend.Options.TracingConfig = &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: queryFrontend.CommonLabels[k8sutil.NameLabel], + }, + } + queryFrontend.Options.QueryRangeSplitInterval = model.Duration(24 * time.Hour) + queryFrontend.Options.LabelsSplitInterval = model.Duration(24 * time.Hour) + zero := 0 + queryFrontend.Options.QueryRangeMaxRetriesPerRequest = &zero + queryFrontend.Options.LabelsMaxRetriesPerRequest = &zero + queryFrontend.Options.LabelsDefaultTimeRange = model.Duration(14 * 24 * time.Hour) + queryFrontend.Options.CacheCompressionType = queryfrontend.CacheCompressionTypeSnappy + + // Execute preManifestsHook + if o.QueryFrontendPreManifestsHook != nil { + o.QueryFrontendPreManifestsHook(queryFrontend) + } + + // Post process + manifests := queryFrontend.Manifests() + postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests), queryFrontend.Namespace) + addQuayPullSecret(getObject[*corev1.ServiceAccount](manifests)) + service := getObject[*corev1.Service](manifests) + service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret + // Add annotations for openshift oauth so that the route to access the query ui works + serviceAccount := getObject[*corev1.ServiceAccount](manifests) + if serviceAccount.Annotations == nil { + serviceAccount.Annotations = map[string]string{} + } + serviceAccount.Annotations["serviceaccounts.openshift.io/oauth-redirectreference.application"] = fmt.Sprintf(`{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"%s"}}`, queryFrontend.Name) + + // Add route for oauth-proxy + manifests["oauth-proxy-route"] = &routev1.Route{ + TypeMeta: metav1.TypeMeta{ + Kind: "Route", + APIVersion: routev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: queryFrontend.Name, + Namespace: o.Namespace, + Labels: maps.Clone(getObject[*appsv1.Deployment](manifests).ObjectMeta.Labels), + Annotations: map[string]string{ + "cert-manager.io/issuer-kind": "ClusterIssuer", + "cert-manager.io/issuer-name": "letsencrypt-prod-http", + }, + }, + Spec: routev1.RouteSpec{ + Port: &routev1.RoutePort{ + TargetPort: intstr.FromString("https"), + }, + TLS: &routev1.TLSConfig{ + Termination: routev1.TLSTerminationReencrypt, + InsecureEdgeTerminationPolicy: routev1.InsecureEdgeTerminationPolicyRedirect, + }, + To: routev1.RouteTargetReference{ + Kind: "Service", + Name: queryFrontend.Name, + }, + }, + } + + // Wrap in template, add parameters + defaultParams := defaultTemplateParams(defaultTemplateParamsConfig{ + LogLevel: string(queryFrontend.Options.LogLevel), + Replicas: queryFrontend.Replicas, + CPURequest: queryFrontend.PodResources.Requests[corev1.ResourceCPU], + MemoryLimit: queryFrontend.PodResources.Limits[corev1.ResourceMemory], + MemoryRequest: queryFrontend.PodResources.Requests[corev1.ResourceMemory], + }) + template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ + Name: queryFrontend.Name, + }, append(defaultParams, []templatev1.Parameter{ + { + Name: "OAUTH_PROXY_COOKIE_SECRET", + Generate: "expression", + From: "[a-zA-Z0-9]{40}", + }, + }...)) + + // Adding a special encoder wrapper to replace the templated values in the template with their corresponding template parameter. + return NewDefaultTemplateYAML(encoding.GhodssYAML(template[""]), queryFrontend.Name) } func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook func(*query.QueryDeployment)) encoding.Encoder { @@ -165,6 +280,10 @@ func (o *ObservatoriumMetrics) makeQueryConfig(isRuleQuery bool, preManifestHook queryDplt.Options.QueryTelemetryRequestDurationSecondsQuantiles = []float64{0.1, 0.25, 0.75, 1.25, 1.75, 2.5, 3, 5, 10, 15, 30, 60, 120} } + if !isRuleQuery { + o.queryAdhocURL = fmt.Sprintf("http://%s.%s.svc.cluster.local:10902", queryDplt.Name, queryDplt.Namespace) + } + // Execute preManifestsHook if preManifestHook != nil { preManifestHook(queryDplt) @@ -346,15 +465,16 @@ func (o *ObservatoriumMetrics) makeReceiveRouter() encoding.Encoder { baseHashring = append(baseHashring, newHashring) } + hashringFileName := "hashrings.json" controller.ConfigMaps[baseHashringCm] = map[string]string{ - "hashring.json": baseHashring.String(), + hashringFileName: baseHashring.String(), } // Controller config controller.Options.ConfigMapName = baseHashringCm controller.Options.ConfigMapGeneratedName = generatedHashringCm controller.Options.Namespace = o.Namespace - controller.Options.FileName = "hashrings.json" + controller.Options.FileName = hashringFileName controllerManifests := controller.Manifests() for k, v := range controllerManifests {