From c9f914e2d89fbd54cfd12cba8b0fcbd4393a46f8 Mon Sep 17 00:00:00 2001 From: Charlie Le <3375195+CharlieTLe@users.noreply.github.com> Date: Wed, 10 Jul 2024 00:54:15 -0700 Subject: [PATCH] docs: Getting Started Improvements (#6046) * docs: Getting Started Improvements - Enable auth for multi-tenancy. - Load the Cortex dashboards from the cortex-jsonnet repo for the getting-started tutorial. This should allow newcomers to get a better feel for what Cortex is doing. - Configure recording rules, alerting rules, and alertmanager with cortextool. - Add Grafana datasources to showcase multi-tenancy. Signed-off-by: Charlie Le * Update tenant names to be generic Signed-off-by: Charlie Le * Add Makefile to update dashboards Signed-off-by: Charlie Le * Update datasource names in Grafana Signed-off-by: Charlie Le * Use mixin.libsonnet instead of config.libsonnet Signed-off-by: Friedrich Gonzalez --------- Signed-off-by: Charlie Le Signed-off-by: Friedrich Gonzalez Co-authored-by: Friedrich Gonzalez --- docs/getting-started/.gitignore | 1 + docs/getting-started/Makefile | 20 + docs/getting-started/_index.md | 37 +- docs/getting-started/alertmanager-config.yaml | 4 + docs/getting-started/alerts.yaml | 650 +++ docs/getting-started/cortex-config.yaml | 35 +- .../dashboards/alertmanager.json | 2695 ++++++++++ .../cortex-compactor-resources.json | 782 +++ .../dashboards/cortex-compactor.json | 1771 +++++++ .../dashboards/cortex-config.json | 284 ++ .../dashboards/cortex-object-store.json | 1032 ++++ .../dashboards/cortex-queries.json | 3150 ++++++++++++ .../dashboards/cortex-reads.json | 4518 +++++++++++++++++ .../dashboards/cortex-rollout-progress.json | 1390 +++++ .../dashboards/cortex-scaling.json | 310 ++ .../dashboards/cortex-slow-queries.json | 287 ++ .../dashboards/cortex-writes.json | 2352 +++++++++ docs/getting-started/dashboards/ruler.json | 2849 +++++++++++ docs/getting-started/docker-compose.yaml | 2 + docs/getting-started/grafana-dashboard.yaml | 12 + docs/getting-started/grafana-datasource.yaml | 60 + docs/getting-started/mixin.libsonnet | 10 + docs/getting-started/prometheus-config.yaml | 19 +- docs/getting-started/rules.yaml | 560 ++ 24 files changed, 22820 insertions(+), 10 deletions(-) create mode 100644 docs/getting-started/.gitignore create mode 100644 docs/getting-started/Makefile create mode 100644 docs/getting-started/alertmanager-config.yaml create mode 100644 docs/getting-started/alerts.yaml create mode 100644 docs/getting-started/dashboards/alertmanager.json create mode 100644 docs/getting-started/dashboards/cortex-compactor-resources.json create mode 100644 docs/getting-started/dashboards/cortex-compactor.json create mode 100644 docs/getting-started/dashboards/cortex-config.json create mode 100644 docs/getting-started/dashboards/cortex-object-store.json create mode 100644 docs/getting-started/dashboards/cortex-queries.json create mode 100644 docs/getting-started/dashboards/cortex-reads.json create mode 100644 docs/getting-started/dashboards/cortex-rollout-progress.json create mode 100644 docs/getting-started/dashboards/cortex-scaling.json create mode 100644 docs/getting-started/dashboards/cortex-slow-queries.json create mode 100644 docs/getting-started/dashboards/cortex-writes.json create mode 100644 docs/getting-started/dashboards/ruler.json create mode 100644 docs/getting-started/grafana-dashboard.yaml create mode 100644 docs/getting-started/mixin.libsonnet create mode 100644 docs/getting-started/rules.yaml diff --git a/docs/getting-started/.gitignore b/docs/getting-started/.gitignore new file mode 100644 index 0000000000..011cb58368 --- /dev/null +++ b/docs/getting-started/.gitignore @@ -0,0 +1 @@ +cortex-jsonnet diff --git a/docs/getting-started/Makefile b/docs/getting-started/Makefile new file mode 100644 index 0000000000..242205acd9 --- /dev/null +++ b/docs/getting-started/Makefile @@ -0,0 +1,20 @@ +# Description: This Makefile is used to generate the dashboards, alerts, and rules files from the cortex-jsonnet repo. +# It uses a customized mixin.libsonnet file to generate the files. + +all: + # Clone the cortex-jsonnet repository + git clone https://github.com/cortexproject/cortex-jsonnet --depth 1 + # Show the diff between the mixin.libsonnet file and the cortex-jsonnet/cortex-mixin/mixin.libsonnet file + git --no-pager diff --no-index cortex-jsonnet/cortex-mixin/mixin.libsonnet mixin.libsonnet || true + # Copy the mixin.libsonnet file to the cortex-jsonnet directory + cp mixin.libsonnet cortex-jsonnet/cortex-mixin + # Run the build-mixin target + docker run -v $(shell pwd)/cortex-jsonnet:/workspace -w /workspace -it quay.io/cortexproject/cortex-jsonnet-build-image:fbe4726 make build-mixin + # Remove the dashboards, alerts.yaml, and rules.yaml files + rm -rf dashboards alerts.yaml rules.yaml + # Copy the files from the cortex-jsonnet/cortex-mixin/out directory to the current directory + mv cortex-jsonnet/cortex-mixin/out/* . + +clean: + # Remove the cortex-jsonnet directory + rm -rf cortex-jsonnet \ No newline at end of file diff --git a/docs/getting-started/_index.md b/docs/getting-started/_index.md index ac04274d3a..19d744317e 100644 --- a/docs/getting-started/_index.md +++ b/docs/getting-started/_index.md @@ -54,13 +54,42 @@ how this is configured. #### Configure SeaweedFS (S3) ```sh -# Create a bucket in SeaweedFS -curl --aws-sigv4 "aws:amz:local:seaweedfs" --user "any:any" -X PUT http://localhost:8333/cortex-bucket +# Create buckets in SeaweedFS +$ curl --aws-sigv4 "aws:amz:local:seaweedfs" --user "any:any" -X PUT http://localhost:8333/cortex-blocks +$ curl --aws-sigv4 "aws:amz:local:seaweedfs" --user "any:any" -X PUT http://localhost:8333/cortex-ruler +$ curl --aws-sigv4 "aws:amz:local:seaweedfs" --user "any:any" -X PUT http://localhost:8333/cortex-alertmanager ``` +#### Configure Cortex Recording Rules and Alerting Rules + +We can configure Cortex with [cortextool](https://github.com/cortexproject/cortex-tools/) to load [recording rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [alerting rules](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/). This is optional, but it is helpful to see how Cortex can be configured to manage rules and alerts. + +```sh +# Configure recording rules for the cortex tenant (optional) +$ docker run --network host -v $(pwd):/workspace -w /workspace quay.io/cortexproject/cortex-tools:v0.17.0 rules sync rules.yaml alerts.yaml --id cortex --address http://localhost:9009 +``` + +#### Configure Cortex Alertmanager + +Cortex also comes with a multi-tenant Alertmanager. Let's load configuration for it to be able to view them in Grafana. + +```sh +# Configure alertmanager for the cortex tenant +$ docker run --network host -v $(pwd):/workspace -w /workspace quay.io/cortexproject/cortex-tools:v0.17.0 alertmanager load alertmanager-config.yaml --id cortex --address http://localhost:9009 +``` + +You can configure Alertmanager in [Grafana as well](http://localhost:3000/alerting/notifications?search=&alertmanager=Cortex%20Alertmanager). + +There's a list of recording rules and alerts that should be visible in Grafana [here](http://localhost:3000/alerting/list?view=list&search=datasource:Cortex). + #### Explore -Grafana is configured to use Cortex as a data source. You can explore the data source in Grafana and query metrics. For example, this [explore](http://localhost:3000/explore?schemaVersion=1&panes=%7B%22au0%22:%7B%22datasource%22:%22P6693426190CB2316%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22rate%28prometheus_remote_storage_samples_total%5B$__rate_interval%5D%29%22,%22range%22:true,%22instant%22:true,%22datasource%22:%7B%22type%22:%22prometheus%22,%22uid%22:%22P6693426190CB2316%22%7D,%22editorMode%22:%22builder%22,%22legendFormat%22:%22__auto%22,%22useBackend%22:false,%22disableTextWrap%22:false,%22fullMetaSearch%22:false,%22includeNullMetadata%22:false%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1) page is showing the rate of samples being sent to Cortex. +Grafana is configured to use Cortex as a data source. Grafana is also configured with [Cortex Dashboards](http://localhost:3000/dashboards?tag=cortex) to understand the state of the Cortex instance. The dashboards are generated from the cortex-jsonnet repository. There is a Makefile in the repository that can be used to update the dashboards. + +```sh +# Update the dashboards (optional) +$ make +``` If everything is working correctly, then the metrics seen in Grafana were successfully sent from Prometheus to Cortex via `remote_write`! @@ -68,7 +97,7 @@ via `remote_write`! Other things to explore: - [Cortex](http://localhost:9009) - Administrative interface for Cortex - - Try shutting down the [ingester](http://localhost:9009/ingester/shutdown) and see how it affects metric ingestion. + - Try shutting down the ingester, and see how it affects metric ingestion. - Restart Cortex to bring the ingester back online, and see how Prometheus catches up. - Does it affect the querying of metrics in Grafana? - [Prometheus](http://localhost:9090) - Prometheus instance that is sending metrics to Cortex diff --git a/docs/getting-started/alertmanager-config.yaml b/docs/getting-started/alertmanager-config.yaml new file mode 100644 index 0000000000..a8f6e03043 --- /dev/null +++ b/docs/getting-started/alertmanager-config.yaml @@ -0,0 +1,4 @@ +receivers: + - name: default-receiver +route: + receiver: default-receiver \ No newline at end of file diff --git a/docs/getting-started/alerts.yaml b/docs/getting-started/alerts.yaml new file mode 100644 index 0000000000..cddd1f57da --- /dev/null +++ b/docs/getting-started/alerts.yaml @@ -0,0 +1,650 @@ +groups: +- name: cortex_alerts + rules: + - alert: CortexIngesterUnhealthy + annotations: + message: | + Cortex cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ printf "%f" $value }} unhealthy ingester(s). + expr: | + min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 + for: 15m + labels: + severity: critical + - alert: CortexRequestErrors + annotations: + message: | + The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. + expr: | + 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready"}[1m])) + / + sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready"}[1m])) + > 1 + for: 15m + labels: + severity: critical + - alert: CortexRequestLatency + annotations: + message: | + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + expr: | + cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop"} + > + 2.5 + for: 15m + labels: + severity: warning + - alert: CortexQueriesIncorrect + annotations: + message: | + The Cortex cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. + expr: | + 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) + / + sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 + for: 15m + labels: + severity: warning + - alert: CortexInconsistentRuntimeConfig + annotations: + message: | + An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. + expr: | + count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 + for: 1h + labels: + severity: critical + - alert: CortexBadRuntimeConfig + annotations: + message: | + {{ $labels.job }} failed to reload runtime config. + expr: | + # The metric value is reset to 0 on error while reloading the config at runtime. + cortex_runtime_config_last_reload_successful == 0 + for: 5m + labels: + severity: critical + - alert: CortexFrontendQueriesStuck + annotations: + message: | + There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} query-frontend. + expr: | + sum by (cluster, namespace) (cortex_query_frontend_queue_length) > 1 + for: 5m + labels: + severity: critical + - alert: CortexSchedulerQueriesStuck + annotations: + message: | + There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} query-scheduler. + expr: | + sum by (cluster, namespace) (cortex_query_scheduler_queue_length) > 1 + for: 5m + labels: + severity: critical + - alert: CortexMemcachedRequestErrors + annotations: + message: | + Memcached {{ $labels.name }} used by Cortex {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. + expr: | + ( + sum by(cluster, namespace, name, operation) (rate(thanos_memcached_operation_failures_total[1m])) / + sum by(cluster, namespace, name, operation) (rate(thanos_memcached_operations_total[1m])) + ) * 100 > 5 + for: 5m + labels: + severity: warning + - alert: CortexIngesterRestarts + annotations: + message: | + {{ $labels.job }}/{{ $labels.instance }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. + expr: | + changes(process_start_time_seconds{job=~".+(cortex|ingester.*)"}[30m]) >= 2 + labels: + severity: warning + - alert: CortexKVStoreFailure + annotations: + message: | + Cortex {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. + expr: | + ( + sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) + / + sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) + ) + # We want to get alerted only in case there's a constant failure. + == 1 + for: 5m + labels: + severity: warning + - alert: CortexMemoryMapAreasTooHigh + annotations: + message: | + {{ $labels.job }}/{{ $labels.instance }} has a number of mmap-ed areas close to the limit. + expr: | + process_memory_map_areas{job=~".+(cortex|ingester.*|store-gateway)"} / process_memory_map_areas_limit{job=~".+(cortex|ingester.*|store-gateway)"} > 0.8 + for: 5m + labels: + severity: critical +- name: cortex_ingester_instance_alerts + rules: + - alert: CortexIngesterReachingSeriesLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.instance }} has reached {{ $value | humanizePercentage }} of its series limit. + expr: | + ( + (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_series"} > 0) + ) > 0.8 + for: 3h + labels: + severity: warning + - alert: CortexIngesterReachingSeriesLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.instance }} has reached {{ $value | humanizePercentage }} of its series limit. + expr: | + ( + (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_series"} > 0) + ) > 0.9 + for: 5m + labels: + severity: critical + - alert: CortexIngesterReachingTenantsLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.instance }} has reached {{ $value | humanizePercentage }} of its tenant limit. + expr: | + ( + (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_tenants"} > 0) + ) > 0.7 + for: 5m + labels: + severity: warning + - alert: CortexIngesterReachingTenantsLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.instance }} has reached {{ $value | humanizePercentage }} of its tenant limit. + expr: | + ( + (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_tenants"} > 0) + ) > 0.8 + for: 5m + labels: + severity: critical + - alert: CortexDistributorReachingInflightPushRequestLimit + annotations: + message: | + Distributor {{ $labels.job }}/{{ $labels.instance }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. + expr: | + ( + (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) + and ignoring (limit) + (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) + ) > 0.8 + for: 5m + labels: + severity: critical +- name: cortex-rollout-alerts + rules: + - alert: CortexRolloutStuck + annotations: + message: | + The {{ $labels.statefulset }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. + expr: | + ( + max without (revision) ( + kube_statefulset_status_current_revision + unless + kube_statefulset_status_update_revision + ) + * + ( + kube_statefulset_replicas + != + kube_statefulset_status_replicas_updated + ) + ) and ( + changes(kube_statefulset_status_replicas_updated[15m]) + == + 0 + ) + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + for: 15m + labels: + severity: warning + - alert: CortexRolloutStuck + annotations: + message: | + The {{ $labels.deployment }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. + expr: | + ( + kube_deployment_spec_replicas + != + kube_deployment_status_replicas_updated + ) and ( + changes(kube_deployment_status_replicas_updated[15m]) + == + 0 + ) + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + for: 15m + labels: + severity: warning +- name: cortex-provisioning + rules: + - alert: CortexProvisioningTooManyActiveSeries + annotations: + message: | + The number of in-memory series per ingester in {{ $labels.cluster }}/{{ $labels.namespace }} is too high. + expr: | + avg by (cluster, namespace) (cortex_ingester_memory_series) > 1.6e6 + for: 2h + labels: + severity: warning + - alert: CortexProvisioningTooManyWrites + annotations: + message: | + Ingesters in {{ $labels.cluster }}/{{ $labels.namespace }} ingest too many samples per second. + expr: | + avg by (cluster, namespace) (rate(cortex_ingester_ingested_samples_total[1m])) > 80e3 + for: 15m + labels: + severity: warning + - alert: CortexAllocatingTooMuchMemory + annotations: + message: | + Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + expr: | + ( + container_memory_working_set_bytes{container="ingester"} + / + container_spec_memory_limit_bytes{container="ingester"} + ) > 0.65 + for: 15m + labels: + severity: warning + - alert: CortexAllocatingTooMuchMemory + annotations: + message: | + Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + expr: | + ( + container_memory_working_set_bytes{container="ingester"} + / + container_spec_memory_limit_bytes{container="ingester"} + ) > 0.8 + for: 15m + labels: + severity: critical +- name: ruler_alerts + rules: + - alert: CortexRulerTooManyFailedPushes + annotations: + message: | + Cortex Ruler {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. + expr: | + 100 * ( + sum by (cluster, namespace, instance) (rate(cortex_ruler_write_requests_failed_total[1m])) + / + sum by (cluster, namespace, instance) (rate(cortex_ruler_write_requests_total[1m])) + ) > 1 + for: 5m + labels: + severity: critical + - alert: CortexRulerTooManyFailedQueries + annotations: + message: | + Cortex Ruler {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. + expr: | + 100 * ( + sum by (cluster, namespace, instance) (rate(cortex_ruler_queries_failed_total[1m])) + / + sum by (cluster, namespace, instance) (rate(cortex_ruler_queries_total[1m])) + ) > 1 + for: 5m + labels: + severity: warning + - alert: CortexRulerMissedEvaluations + annotations: + message: | + Cortex Ruler {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. + expr: | + sum by (cluster, namespace, instance, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) + / + sum by (cluster, namespace, instance, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) + > 0.01 + for: 5m + labels: + severity: warning + - alert: CortexRulerFailedRingCheck + annotations: + message: | + Cortex Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. + expr: | + sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) + > 0 + for: 5m + labels: + severity: critical +- name: gossip_alerts + rules: + - alert: CortexGossipMembersMismatch + annotations: + message: | + Cortex instance {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} sees incorrect number of gossip members. + expr: | + memberlist_client_cluster_members_count + != on (cluster, namespace) group_left + sum by (cluster, namespace) (up{job=~".+/(compactor|distributor|ingester.*|querier.*|ruler|store-gateway|cortex)"}) + for: 5m + labels: + severity: warning +- name: etcd_alerts + rules: + - alert: EtcdAllocatingTooMuchMemory + annotations: + message: | + Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. + expr: | + ( + container_memory_working_set_bytes{container="etcd"} + / + container_spec_memory_limit_bytes{container="etcd"} + ) > 0.65 + for: 15m + labels: + severity: warning + - alert: EtcdAllocatingTooMuchMemory + annotations: + message: | + Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. + expr: | + ( + container_memory_working_set_bytes{container="etcd"} + / + container_spec_memory_limit_bytes{container="etcd"} + ) > 0.8 + for: 15m + labels: + severity: critical +- name: alertmanager_alerts + rules: + - alert: CortexAlertmanagerSyncConfigsFailing + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} is failing to read tenant configurations from storage. + expr: | + rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 + for: 30m + labels: + severity: critical + - alert: CortexAlertmanagerRingCheckFailing + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} is unable to check tenants ownership via the ring. + expr: | + rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: CortexAlertmanagerPartialStateMergeFailing + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} is failing to merge partial state changes received from a replica. + expr: | + rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: CortexAlertmanagerReplicationFailing + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} is failing to replicating partial state to its replicas. + expr: | + rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: CortexAlertmanagerPersistStateFailing + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} is unable to persist full state snaphots to remote storage. + expr: | + rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 + for: 1h + labels: + severity: critical + - alert: CortexAlertmanagerInitialSyncFailed + annotations: + message: | + Cortex Alertmanager {{ $labels.job }}/{{ $labels.instance }} was unable to obtain some initial state when starting up. + expr: | + increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 + labels: + severity: critical +- name: cortex_blocks_alerts + rules: + - alert: CortexIngesterHasNotShippedBlocks + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not shipped any block in the last 4 hours. + expr: | + (min by(cluster, namespace, instance) (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester.*"}) > 60 * 60 * 4) + and + (max by(cluster, namespace, instance) (thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester.*"}) > 0) + and + # Only if the ingester has ingested samples over the last 4h. + (max by(cluster, namespace, instance) (rate(cortex_ingester_ingested_samples_total[4h])) > 0) + and + # Only if the ingester was ingesting samples 4h ago. This protects from the case the ingester instance + # had ingested samples in the past, then no traffic was received for a long period and then it starts + # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving + # samples, while the a block shipping is expected within the next 4h. + (max by(cluster, namespace, instance) (rate(cortex_ingester_ingested_samples_total[1h] offset 4h)) > 0) + for: 15m + labels: + severity: critical + - alert: CortexIngesterHasNotShippedBlocksSinceStart + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not shipped any block in the last 4 hours. + expr: | + (max by(cluster, namespace, instance) (thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester.*"}) == 0) + and + (max by(cluster, namespace, instance) (rate(cortex_ingester_ingested_samples_total[4h])) > 0) + for: 4h + labels: + severity: critical + - alert: CortexIngesterHasUnshippedBlocks + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't been successfully uploaded to the storage yet. + expr: | + (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) + and + (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) + for: 15m + labels: + severity: critical + - alert: CortexIngesterTSDBHeadCompactionFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to compact TSDB head. + expr: | + rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: CortexIngesterTSDBHeadTruncationFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to truncate TSDB head. + expr: | + rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 + labels: + severity: critical + - alert: CortexIngesterTSDBCheckpointCreationFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to create TSDB checkpoint. + expr: | + rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 + labels: + severity: critical + - alert: CortexIngesterTSDBCheckpointDeletionFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to delete TSDB checkpoint. + expr: | + rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 + labels: + severity: critical + - alert: CortexIngesterTSDBWALTruncationFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to truncate TSDB WAL. + expr: | + rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 + labels: + severity: warning + - alert: CortexIngesterTSDBWALCorrupted + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} got a corrupted TSDB WAL. + expr: | + rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0 + labels: + severity: critical + - alert: CortexIngesterTSDBWALWritesFailed + annotations: + message: | + Cortex Ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to write to TSDB WAL. + expr: | + rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 + for: 3m + labels: + severity: critical + - alert: CortexQuerierHasNotScanTheBucket + annotations: + message: | + Cortex Querier {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not successfully scanned the bucket since {{ $value | humanizeDuration }}. + expr: | + (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) + and + cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 + for: 5m + labels: + severity: critical + - alert: CortexQuerierHighRefetchRate + annotations: + message: | + Cortex Queries in {{ $labels.cluster }}/{{ $labels.namespace }} are refetching series from different store-gateways (because of missing blocks) for the {{ printf "%.0f" $value }}% of queries. + expr: | + 100 * ( + ( + sum by(cluster, namespace) (rate(cortex_querier_storegateway_refetches_per_query_count[5m])) + - + sum by(cluster, namespace) (rate(cortex_querier_storegateway_refetches_per_query_bucket{le="0.0"}[5m])) + ) + / + sum by(cluster, namespace) (rate(cortex_querier_storegateway_refetches_per_query_count[5m])) + ) + > 1 + for: 10m + labels: + severity: warning + - alert: CortexStoreGatewayHasNotSyncTheBucket + annotations: + message: | + Cortex Store Gateway {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not successfully synched the bucket since {{ $value | humanizeDuration }}. + expr: | + (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) + and + cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 + for: 5m + labels: + severity: critical + - alert: CortexBucketIndexNotUpdated + annotations: + message: | + Cortex bucket index for tenant {{ $labels.user }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration }}. + expr: | + min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 + labels: + severity: critical + - alert: CortexTenantHasPartialBlocks + annotations: + message: | + Cortex tenant {{ $labels.user }} in {{ $labels.cluster }}/{{ $labels.namespace }} has {{ $value }} partial blocks. + expr: | + max by(cluster, namespace, user) (cortex_bucket_blocks_partials_count) > 0 + for: 6h + labels: + severity: warning +- name: cortex_compactor_alerts + rules: + - alert: CortexCompactorHasNotSuccessfullyCleanedUpBlocks + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not successfully cleaned up blocks in the last 6 hours. + expr: | + (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) + for: 1h + labels: + severity: critical + - alert: CortexCompactorHasNotSuccessfullyRunCompaction + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not run compaction in the last 24 hours. + expr: | + (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) + and + (cortex_compactor_last_successful_run_timestamp_seconds > 0) + for: 1h + labels: + severity: critical + - alert: CortexCompactorHasNotSuccessfullyRunCompaction + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not run compaction in the last 24 hours. + expr: | + cortex_compactor_last_successful_run_timestamp_seconds == 0 + for: 24h + labels: + severity: critical + - alert: CortexCompactorHasNotSuccessfullyRunCompaction + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} failed to run 2 consecutive compactions. + expr: | + increase(cortex_compactor_runs_failed_total[2h]) >= 2 + labels: + severity: critical + - alert: CortexCompactorHasNotUploadedBlocks + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not uploaded any block in the last 24 hours. + expr: | + (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor.*"} > 60 * 60 * 24) + and + (thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor.*"} > 0) + for: 15m + labels: + severity: critical + - alert: CortexCompactorHasNotUploadedBlocks + annotations: + message: | + Cortex Compactor {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not uploaded any block in the last 24 hours. + expr: | + thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor.*"} == 0 + for: 24h + labels: + severity: critical diff --git a/docs/getting-started/cortex-config.yaml b/docs/getting-started/cortex-config.yaml index d11b626ded..403f12d22d 100644 --- a/docs/getting-started/cortex-config.yaml +++ b/docs/getting-started/cortex-config.yaml @@ -1,14 +1,15 @@ - # Configuration for running Cortex in single-process mode. # This should not be used in production. It is only for getting started # and development. -target: all,compactor +# https://cortexmetrics.io/docs/configuration/configuration-file/#supported-contents-and-default-values-of-the-config-file +target: all,compactor,alertmanager # Disable the requirement that every request to Cortex has an # X-Scope-OrgID header. `fake` will be substituted in instead. -auth_enabled: false +auth_enabled: true +# https://cortexmetrics.io/docs/configuration/configuration-file/#server_config server: http_listen_port: 9009 @@ -17,11 +18,13 @@ server: grpc_server_max_send_msg_size: 104857600 grpc_server_max_concurrent_streams: 1000 +# https://cortexmetrics.io/docs/configuration/configuration-file/#distributor_config distributor: shard_by_all_labels: true pool: health_check_ingesters: true +# https://cortexmetrics.io/docs/configuration/configuration-file/#ingester_client_config ingester_client: grpc_client_config: # Configure the client to allow messages up to 100MB. @@ -29,6 +32,7 @@ ingester_client: max_send_msg_size: 104857600 grpc_compression: gzip +# https://cortexmetrics.io/docs/configuration/configuration-file/#ingester_config ingester: lifecycler: # The address to advertise for this ingester. Will be autodiscovered by @@ -46,11 +50,13 @@ ingester: store: inmemory replication_factor: 1 +# https://cortexmetrics.io/docs/configuration/configuration-file/#blocks_storage_config blocks_storage: + backend: s3 s3: &s3 endpoint: seaweedfs:8333 region: local - bucket_name: cortex-bucket + bucket_name: cortex-blocks access_key_id: any secret_access_key: any insecure: true @@ -61,18 +67,37 @@ blocks_storage: bucket_index: enabled: true +# https://cortexmetrics.io/docs/configuration/configuration-file/#compactor_config compactor: data_dir: /tmp/cortex/compactor sharding_ring: kvstore: store: inmemory +# https://cortexmetrics.io/docs/configuration/configuration-file/#frontend_worker_config frontend_worker: match_max_concurrent: true +# https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_config ruler: enable_api: true +# https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_storage_config ruler_storage: backend: s3 - s3: *s3 + s3: + <<: *s3 + bucket_name: cortex-ruler + +# https://cortexmetrics.io/docs/configuration/configuration-file/#alertmanager_config +alertmanager: + external_url: /alertmanager + enable_api: true + data_dir: /data/alertmanager + +# https://cortexmetrics.io/docs/configuration/configuration-file/#alertmanager_storage_config +alertmanager_storage: + backend: s3 + s3: + <<: *s3 + bucket_name: cortex-alertmanager \ No newline at end of file diff --git a/docs/getting-started/dashboards/alertmanager.json b/docs/getting-started/dashboards/alertmanager.json new file mode 100644 index 0000000000..b9bf3cd133 --- /dev/null +++ b/docs/getting-started/dashboards/alertmanager.json @@ -0,0 +1,2695 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total Alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total Silences", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(cortex_alertmanager_tenants_discovered{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{job=~\"$job\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{job=~\"$job\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "APS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts Received", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{job=~\"$job\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{job=~\"$job\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{job=~\"$job\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{job=~\"$job\"}) by(integration)\n) > 0\nor on () vector(0)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success - {{ integration }}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{job=~\"$job\"}) by(integration)", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed - {{ integration }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS by integration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alert Notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_v1_alerts|alertmanager\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_v1_alerts|alertmanager\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_v1_alerts|alertmanager\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"api_v1_alerts|alertmanager\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"api_v1_alerts|alertmanager\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Configuration API (gateway) + Alertmanager UI", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"alertmanager-storage\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Operations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"alertmanager-storage\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (cortex_alertmanager_tenants_owned{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod Alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod Silences", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Replication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_total{job=~\"$job\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Syncs/sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{reason}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Syncs/sec (By Reason)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "errors", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Ring Check Errors/sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant Configuration Sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{outcome}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Initial syncs /sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Initial sync duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{job=~\"$job\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch state from other alertmanagers /sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding Initial State Sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{job=~\"$job\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{job=~\"$job\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Replicate state to other alertmanagers /sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{job=~\"$job\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{job=~\"$job\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Merge state from other alertmanagers /sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_persist_total{job=~\"$job\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Persist state to remote storage /sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding Runtime State Sync", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Alertmanager", + "uid": "a76bee5913c97c918d9e56a3cc88cc28", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-compactor-resources.json b/docs/getting-started/dashboards/cortex-compactor-resources.json new file mode 100644 index 0000000000..dfaf7c870b --- /dev/null +++ b/docs/getting-started/dashboards/cortex-compactor-resources.json @@ -0,0 +1,782 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{job=~\"$job\",container=~\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(container_spec_cpu_quota{job=~\"$job\",container=~\"compactor\"} / container_spec_cpu_period{job=~\"$job\",container=~\"compactor\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{job=~\"$job\",container=~\"compactor\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(container_spec_memory_limit_bytes{job=~\"$job\",container=~\"compactor\"} > 0)", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU and Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{job=~\"$job\",pod=~\"compactor.*\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{job=~\"$job\",pod=~\"compactor.*\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n job=~\"$job\",\n container=\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Writes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n job=~\"$job\",\n container=\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Reads", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{job=~\"$job\"} /\n kubelet_volume_stats_capacity_bytes{job=~\"$job\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n job=~\"$job\",\n label_name=\"compactor\"\n }\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilization", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Compactor Resources", + "uid": "df9added6f1f4332f95848cca48ebd99", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-compactor.json b/docs/getting-started/dashboards/cortex-compactor.json new file mode 100644 index 0000000000..f1ae6789b9 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-compactor.json @@ -0,0 +1,1771 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "completed": "#7EB26D", + "failed": "#E24D42", + "started": "#34CCEB" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_runs_started_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "started", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_compactor_runs_completed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "completed", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_compactor_runs_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per-instance runs / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\nReset to 0 after the compaction run is completed for all tenants in the shard.\n\n", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n cortex_compactor_tenants_processing_succeeded{job=~\"$job\"} +\n cortex_compactor_tenants_processing_failed{job=~\"$job\"} +\n cortex_compactor_tenants_skipped{job=~\"$job\"}\n) / cortex_compactor_tenants_discovered{job=~\"$job\"}\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Tenants compaction progress", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Compacted blocks / sec\nRate of blocks that are generated as a result of a compaction operation.\n\n", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_compactions_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "blocks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compacted blocks / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Per-block compaction duration\nDisplay the amount of time that it has taken to generate a single compacted block.\n\n", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per-block compaction duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(max by(user) (cortex_bucket_blocks_count{job=~\"$job\"}))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average blocks / tenant", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{job=~\"$job\"}))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{user}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Tenants with largest number of blocks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "blocks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks marked for deletion / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks deletions / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Garbage Collector", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_meta_syncs_total{job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_compactor_meta_sync_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Syncs / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Sync Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Metadata Sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Operations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Object Store", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Compactor", + "uid": "9c408e1d55681ecb8a22c9fab46875cc", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-config.json b/docs/getting-started/dashboards/cortex-config.json new file mode 100644 index 0000000000..20026709e7 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-config.json @@ -0,0 +1,284 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "count(cortex_config_hash{job=~\"$job\"}) by (sha256)", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "sha256:{{sha256}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Startup config file hashes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "instances", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Startup config file", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "count(cortex_runtime_config_hash{job=~\"$job\"}) by (sha256)", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "sha256:{{sha256}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Runtime config file hashes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "instances", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Runtime config file", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Config", + "uid": "61bb048ced9817b2d3e07677fb1c6290", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-object-store.json b/docs/getting-started/dashboards/cortex-object-store.json new file mode 100644 index 0000000000..20d96b2650 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-object-store.json @@ -0,0 +1,1032 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{component}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "RPS / component", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{component}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate / component", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Components", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "RPS / operation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate / operation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Operations", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Object Store", + "uid": "d5a3a4489d57c733b5677fb55370a723", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-queries.json b/docs/getting-started/dashboards/cortex-queries.json new file mode 100644 index 0000000000..d2a38c2248 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-queries.json @@ -0,0 +1,3150 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_query_frontend_retries_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Retries", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cortex_query_frontend_queue_length{job=~\"$job\"}", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{cluster}} / {{namespace}} / {{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Length", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cortex_query_scheduler_queue_length{job=~\"$job\"}", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{cluster}} / {{namespace}} / {{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Length", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Intervals per Query\nThe average number of splitted queries (partitioned by time) executed a single input query.\n\n", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_split_queries_total{job=~\"$job\"}[1m])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{job=~\"$job\", method=\"split_by_interval\"}[1m]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "splitting rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Intervals per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_cache_hits{name=~\"frontend.+\", job=~\"$job\"}[1m])) / sum(rate(cortex_cache_fetched_keys{name=~\"frontend.+\", job=~\"$job\"}[1m])) or\nsum(rate(cortex_cache_hits_total{name=~\"frontend.+\", job=~\"$job\"}[1m])) / sum(rate(cortex_cache_fetched_keys_total{name=~\"frontend.+\", job=~\"$job\"}[1m]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Hit Rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Results Cache Hit %", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_cache_fetched_keys{name=~\"frontend.+\", job=~\"$job\"}[1m])) - sum(rate(cortex_cache_hits{name=~\"frontend.+\", job=~\"$job\"}[1m])) or\nsum(rate(cortex_cache_fetched_keys_total{name=~\"frontend.+\", job=~\"$job\"}[1m])) - sum(rate(cortex_cache_hits_total{name=~\"frontend.+\", job=~\"$job\"}[1m]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Miss Rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Results Cache misses", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend - Query Splitting and Results Cache", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Sharded Queries Ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel takes in account only type of queries which are supported by query sharding (eg. range queries).\n\n", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{job=~\"$job\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "sharded queries ratio", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Sharded Queries Ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Number of Sharded Queries per Query\nHow many sharded queries have been executed for a single input query. It tracks only queries which have\nbeen successfully rewritten in a shardable way.\n\n", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Sharded Queries per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend - Query sharding", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\"}) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{slice}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Stages", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_cache_fetched_keys{job=~\"$job\",name=\"chunksmemcache\"}[1m])) - sum(rate(cortex_cache_hits{job=~\"$job\",name=\"chunksmemcache\"}[1m])) or\nsum(rate(cortex_cache_fetched_keys_total{job=~\"$job\",name=\"chunksmemcache\"}[1m])) - sum(rate(cortex_cache_hits_total{job=~\"$job\",name=\"chunksmemcache\"}[1m]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Hit rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk cache misses", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_cache_corrupt_chunks_total{job=~\"$job\"}[1m]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Corrupt chunks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk cache corruptions", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{job=~\"$job\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_chunks_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_chunks_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_chunks_sum:sum_rate{job=~\"$job\"}) / sum(cluster_job:cortex_ingester_queried_chunks_count:sum_rate{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunks per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{job=~\"$job\"})) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{job=~\"$job\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Samples per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of store-gateways hit per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Refetches of missing blocks per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{job=~\"$job\"}[1m])) / sum(rate(cortex_querier_blocks_consistency_checks_total{job=~\"$job\"}[1m]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Failure Rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Consistency checks failed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier - Blocks storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(cortex_bucket_index_loaded{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Max", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(cortex_bucket_index_loaded{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Min", + "legendLink": null, + "step": 10 + }, + { + "expr": "avg(cortex_bucket_index_loaded{job=~\"$job\"})", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bucket indexes loaded (per querier)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_index_loads_total{job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_index_load_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bucket indexes load / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bucket indexes load latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "blocks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks queried / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(data_type) (rate(cortex_bucket_store_series_data_fetched_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{data_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Data fetched / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(data_type) (rate(cortex_bucket_store_series_data_touched_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{data_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Data touched / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway - Blocks storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_series_get_all_duration_seconds_bucket{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_series_get_all_duration_seconds_bucket{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_store_series_get_all_duration_seconds_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_series_get_all_duration_seconds_count{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series fetch duration (per request)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_series_merge_duration_seconds_bucket{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_series_merge_duration_seconds_bucket{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_store_series_merge_duration_seconds_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_series_merge_duration_seconds_count{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series merge duration (per request)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_result_series_sum{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) / sum(rate(cortex_bucket_store_series_result_series_count{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "avg series returned", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series returned (per request)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_bucket_store_blocks_loaded{component=\"store-gateway\",job=~\"$job\"}) without (user)", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks currently loaded", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks loaded / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Blocks dropped / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cortex_bucket_store_indexheader_lazy_load_total{job=~\"$job\"} - cortex_bucket_store_indexheader_lazy_unload_total{job=~\"$job\"}", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Lazy loaded index-headers", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Index-header lazy load duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{job=~\"$job\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "hit ratio", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series hash cache hit ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Queries", + "uid": "d9931b1054053c8b972d320774bb8f1d", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-reads.json b/docs/getting-started/dashboards/cortex-reads.json new file mode 100644 index 0000000000..a02f9c7aa0 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-reads.json @@ -0,0 +1,4518 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the Cortex read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed with Cortex:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n job=~\"$job\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n) +\nsum(\n rate(\n cortex_prometheus_rule_evaluations_total{\n job=~\"$job\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Instant queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Range queries per second\nRate of range queries per second being made to\nCortex via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n job=~\"$job\",\n route=~\"(prometheus|api_prom)_api_v1_query_range\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Range queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 6, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 9, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n", + "datasource": null, + "description": "", + "id": 10, + "mode": "markdown", + "span": 4, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{job=~\"$job\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency (Time in Queue)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_cache_request_duration_seconds_count{method=~\"frontend.+\", job=~\"$job\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate{job=~\"$job\", method=~\"frontend.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate{job=~\"$job\", method=~\"frontend.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate{job=~\"$job\", method=~\"frontend.+\"}) / sum(cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate{job=~\"$job\", method=~\"frontend.+\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache - Query Results", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 17, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 20, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"/gatewaypb.StoreGateway/.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 23, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n job=~\"$job\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_memcached_operation_duration_seconds_sum{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])) * 1e3 / sum(rate(thanos_memcached_operation_duration_seconds_count{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency (getmulti)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Hit Ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because Cortex by default has an\nin-memory blocks index cache.\n\n", + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n job=~\"$job\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n job=~\"$job\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{item_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hit ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – Blocks storage – Block index cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (\n rate(\n thanos_memcached_operations_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_memcached_operation_duration_seconds_sum{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])) * 1e3 / sum(rate(thanos_memcached_operation_duration_seconds_count{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency (getmulti)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n thanos_cache_memcached_hits_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval]\n )\n)\n/\nsum(\n rate(\n thanos_cache_memcached_requests_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "items", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hit ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – Blocks storage – Chunks cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (\n rate(\n thanos_memcached_operations_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_memcached_operation_duration_seconds_sum{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) * 1e3 / sum(rate(thanos_memcached_operation_duration_seconds_count{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency (getmulti)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n thanos_cache_memcached_hits_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n/\nsum(\n rate(\n thanos_cache_memcached_requests_total{\n job=~\"$job\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "items", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hit ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – Blocks storage – Metadata cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (\n rate(\n thanos_memcached_operations_total{\n job=~\"$job\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_memcached_operation_duration_seconds_bucket{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_memcached_operation_duration_seconds_sum{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])) * 1e3 / sum(rate(thanos_memcached_operation_duration_seconds_count{\n job=~\"$job\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency (getmulti)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n thanos_cache_memcached_hits_total{\n job=~\"$job\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n/\nsum(\n rate(\n thanos_cache_memcached_requests_total{\n job=~\"$job\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "items", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hit ratio", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – Blocks storage – Metadata cache (querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Operations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks Object Store (Store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 42, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"querier\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Operations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"querier\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks Object Store (Querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 49, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 50, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 51, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Reads", + "uid": "8d6ba60eccc4b6eedfa329b24b1bd339", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-rollout-progress.json b/docs/getting-started/dashboards/cortex-rollout-progress.json new file mode 100644 index 0000000000..fe24526292 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-rollout-progress.json @@ -0,0 +1,1390 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "yellow", + "value": null + }, + { + "color": "yellow", + "value": 0.999 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "percentunit" + } + }, + "fill": 1, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "displayMode": "basic", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum by(cortex_service) (\n label_replace(\n kube_statefulset_status_replicas_updated{job=~\"$job\",statefulset=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"},\n \"cortex_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n /\n sum by(cortex_service) (\n label_replace(\n kube_statefulset_replicas{job=~\"$job\"},\n \"cortex_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n) and (\n sum by(cortex_service) (\n label_replace(\n kube_statefulset_replicas{job=~\"$job\"},\n \"cortex_service\", \"$1\", \"statefulset\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n > 0\n)\n", + "format": null, + "intervalFactor": null, + "legendFormat": "{{cortex_service}}", + "legendLink": null, + "step": null + }, + { + "expr": "(\n sum by(cortex_service) (\n label_replace(\n kube_deployment_status_replicas_updated{job=~\"$job\",deployment=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"},\n \"cortex_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n /\n sum by(cortex_service) (\n label_replace(\n kube_deployment_spec_replicas{job=~\"$job\"},\n \"cortex_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n) and (\n sum by(cortex_service) (\n label_replace(\n kube_deployment_spec_replicas{job=~\"$job\"},\n \"cortex_service\", \"$1\", \"deployment\", \"(.*?)(?:-zone-[a-z])?\"\n )\n )\n > 0\n)\n", + "format": null, + "intervalFactor": null, + "legendFormat": "{{cortex_service}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rollout progress", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "bargauge", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes 99th Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 4 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.01 + }, + { + "color": "red", + "value": 0.050000000000000003 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 4 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 4 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads 99th Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "noValue": "All healthy", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 8 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "text": { + "titleSize": 14, + "valueSize": 14 + } + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas_unavailable{job=~\"$job\", deployment=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{deployment}}", + "legendLink": null, + "step": null + }, + { + "expr": "kube_statefulset_status_replicas_current{job=~\"$job\", statefulset=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"} -\nkube_statefulset_status_replicas_ready {job=~\"$job\", statefulset=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{statefulset}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Unhealthy pods", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "r.*" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 10, + "y": 8 + }, + "id": 11, + "targets": [ + { + "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{job=~\"$job\",container=~\"cortex-gw|distributor|ingester.*|query-frontend.*|query-scheduler.*|querier.*|compactor|store-gateway|ruler|alertmanager\"},\n \"version\", \"$1\", \"image\", \".*:(.+)-.*\"\n )\n)\n", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods count per Version", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "valueLabel": "version" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { }, + "sort": [ + { + "field": "container" + } + ] + } + } + ], + "type": "table" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"}))[1h:])\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "writes", + "legendLink": null, + "step": 10 + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "reads", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency vs 24h ago", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "10s", + "rows": null, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "job", + "multi": false, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Rollout progress", + "uid": "7544a3a62b1be6ffd919fc990ab8ba8f", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-scaling.json b/docs/getting-started/dashboards/cortex-scaling.json new file mode 100644 index 0000000000..f481f25bdd --- /dev/null +++ b/docs/getting-started/dashboards/cortex-scaling.json @@ -0,0 +1,310 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "id": 1, + "options": { + "content": "This dashboards shows any services which are not scaled correctly.\nThe table below gives the required number of replicas and the reason why.\nWe only show services without enough replicas.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", + "mode": "markdown" + }, + "span": 12, + "title": "", + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cortex Service Scaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "400px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "__name__" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "displayName", + "value": "Namespace" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "deployment" + }, + "properties": [ + { + "id": "displayName", + "value": "Service" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "reason" + }, + "properties": [ + { + "id": "displayName", + "value": "Reason" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Required Replicas" + } + ] + } + ] + }, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Workload-based scaling", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Scaling", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Scaling", + "uid": "88c041017b96856c9176e07cf557bdcf", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-slow-queries.json b/docs/getting-started/dashboards/cortex-slow-queries.json new file mode 100644 index 0000000000..cf1ac17c49 --- /dev/null +++ b/docs/getting-started/dashboards/cortex-slow-queries.json @@ -0,0 +1,287 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${lokidatasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time range" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "from": "", + "id": 1, + "text": "Instant query", + "to": "", + "type": 1, + "value": "0" + } + ] + }, + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "id": 1, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | org_id=~\"${tenant_id}\" | response_time > ${min_duration}", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "labelsToFields", + "options": { } + }, + { + "id": "calculateField", + "options": { + "alias": "Time range", + "binary": { + "left": "param_end", + "operator": "-", + "reducer": "sum", + "right": "param_start" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "caller": true, + "cluster": true, + "container": true, + "host": true, + "id": true, + "job": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "param_end": true, + "param_start": true, + "param_time": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "query_wall_time_seconds": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "Time range": 3, + "org_id": 1, + "param_query": 2, + "param_step": 4, + "response_time": 5, + "ts": 0 + }, + "renameByName": { + "org_id": "Tenant ID", + "param_query": "Query", + "param_step": "Step", + "response_time": "Duration" + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "job", + "multi": false, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "includeAll": false, + "label": "Logs datasource", + "multi": false, + "name": "lokidatasource", + "query": "loki", + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "5s", + "value": "5s" + }, + "hide": 0, + "label": "Min duration", + "name": "min_duration", + "options": [ + { + "selected": true, + "text": "5s", + "value": "5s" + } + ], + "query": "5s", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Slow Queries", + "uid": "e6f3091e29d2636e3b8393447e925668", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/cortex-writes.json b/docs/getting-started/dashboards/cortex-writes.json new file mode 100644 index 0000000000..f7b3070e9d --- /dev/null +++ b/docs/getting-started/dashboards/cortex-writes.json @@ -0,0 +1,2352 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "125px", + "panels": [ + { + "content": "

\n This dashboard shows various health metrics for the Cortex write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Writes dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Samples / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ingester_memory_series{job=~\"$job\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{job=~\"$job\"}))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Active Series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(count by(user) (cortex_ingester_active_series{job=~\"$job\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "reqps", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[5m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"api_(v1|prom)_push\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 8, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"api_(v1|prom)_push\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 11, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{job=~\"$job\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate{job=~\"$job\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate{job=~\"$job\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate{job=~\"$job\"}) / sum(cluster_job:cortex_kv_request_duration_seconds_count:sum_rate{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Key-value store for high-availability (HA) deduplication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=\"/cortex.Ingester/Push\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 16, + "legend": { + "show": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{job=~\"$job\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per pod p99 Latency", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{job=~\"$job\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate{job=~\"$job\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate{job=~\"$job\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate{job=~\"$job\"}) / sum(cluster_job:cortex_kv_request_duration_seconds_count:sum_rate{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Key-value store for the ingesters ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", + "fill": 10, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_shipper_uploads_total{job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Uploaded blocks / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Upload latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Blocks storage - Shipper", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", + "fill": 10, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compactions / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compactions latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Blocks storage - TSDB Head", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", + "fill": 10, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "WAL truncations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", + "fill": 10, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{job=~\"$job\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "successful", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Checkpoints created / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])) / sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "WAL truncations latency (includes checkpointing)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "WAL": "#E24D42", + "mmap-ed chunks": "#E28A42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_wal_corruptions_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "WAL", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "mmap-ed chunks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Corruptions / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Blocks storage - TSDB write ahead log (WAL)", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Writes", + "uid": "0156f6d15aa234d452a33a4f13c838e3", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/dashboards/ruler.json b/docs/getting-started/dashboards/ruler.json new file mode 100644 index 0000000000..d17fcad627 --- /dev/null +++ b/docs/getting-started/dashboards/ruler.json @@ -0,0 +1,2849 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cortex" + ], + "targetBlank": false, + "title": "Cortex Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ruler_managers_total{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Active Configurations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{job=~\"$job\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total Rules", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[5m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Read from Ingesters - QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "reqps", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[5m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Write to Ingesters - QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{job=~\"$job\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "success", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "failed", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "EPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{job=~\"$job\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "average", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule Evaluations Global", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (route, le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{job=~\"$job\", route=~\"api_prom_rules.*|api_prom_api_v1_(rules|alerts)\"}))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ route }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per route p99 Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Configuration API (gateway)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes (Ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{status}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{job=~\"$job\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads (Ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of store-gateways hit per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{job=~\"$job\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Refetches of missing blocks per Query", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{job=~\"$job\"}[1m])) / sum(rate(cortex_querier_blocks_consistency_checks_total{job=~\"$job\"}[1m]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Failure Rate", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Consistency checks failed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler - Blocks storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{job=~\"$job\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{job=~\"$job\"}[$__rate_interval]))\n> 0\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delivery Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{job=~\"$job\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{job=~\"$job\"}[$__rate_interval])) > 0\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Length", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{job=~\"$job\"}[$__rate_interval])) > 0\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{job=~\"$job\"}[$__rate_interval])) > 0", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Missed Iterations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{job=~\"$job\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{job=~\"$job\"}[$__rate_interval])\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{job=~\"$job\"}[$__rate_interval])) > 0", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ rule_group }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Failures", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Group Evaluations", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{job=~\"$job\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{job=~\"$job\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{ user }}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule Evaluation per User", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"ruler-storage\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Operations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"$job\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{job=~\"$job\",component=\"ruler-storage\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "{{operation}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Error rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Attributes", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Exists", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler Configuration Object Store (Ruler accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Get", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: GetRange", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Upload", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~\"$job\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=~\"$job\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{job=~\"$job\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency of Op: Delete", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "cortex" + ], + "templating": { + "list": [ + { + "current": { + "text": "Cortex", + "value": "Cortex" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(cortex_build_info, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Cortex / Ruler", + "uid": "44d12bcb1f95661c6ab6bc946dfc3473", + "version": 0 + } \ No newline at end of file diff --git a/docs/getting-started/docker-compose.yaml b/docs/getting-started/docker-compose.yaml index f78295a4fc..354afe9d1a 100644 --- a/docs/getting-started/docker-compose.yaml +++ b/docs/getting-started/docker-compose.yaml @@ -35,6 +35,8 @@ services: - GF_LOG_LEVEL=critical volumes: - ./grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro + - ./grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro + - ./dashboards/:/var/lib/grafana/dashboards/:ro ports: - "3000:3000" prometheus: diff --git a/docs/getting-started/grafana-dashboard.yaml b/docs/getting-started/grafana-dashboard.yaml new file mode 100644 index 0000000000..bd8a8cc532 --- /dev/null +++ b/docs/getting-started/grafana-dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "dashboards" + orgId: 1 + type: file + disableDeletion: false + updateIntervalSeconds: 60 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true \ No newline at end of file diff --git a/docs/getting-started/grafana-datasource.yaml b/docs/getting-started/grafana-datasource.yaml index 62a39c88db..530de8c2dd 100644 --- a/docs/getting-started/grafana-datasource.yaml +++ b/docs/getting-started/grafana-datasource.yaml @@ -6,6 +6,66 @@ datasources: access: proxy orgId: 1 url: http://cortex:9009/api/prom + jsonData: &jsonData + cacheLevel: None + httpHeaderName1: X-Scope-OrgID + httpMethod: POST + prometheusType: Cortex + prometheusVersion: 1.14.0 + timeInterval: 15s + secureJsonData: + httpHeaderValue1: cortex version: 1 editable: true isDefault: true + - name: Tenant A + type: prometheus + access: proxy + orgId: 1 + url: http://cortex:9009/api/prom + jsonData: *jsonData + secureJsonData: + httpHeaderValue1: tenant-a + version: 1 + editable: true + - name: Tenant B + type: prometheus + access: proxy + orgId: 1 + url: http://cortex:9009/api/prom + jsonData: *jsonData + secureJsonData: + httpHeaderValue1: tenant-b + version: 1 + editable: true + - name: Tenant C + type: prometheus + access: proxy + orgId: 1 + url: http://cortex:9009/api/prom + jsonData: *jsonData + secureJsonData: + httpHeaderValue1: tenant-c + version: 1 + editable: true + - name: Tenant D + type: prometheus + access: proxy + orgId: 1 + url: http://cortex:9009/api/prom + jsonData: *jsonData + secureJsonData: + httpHeaderValue1: tenant-d + version: 1 + editable: true + - orgId: 1 + name: Cortex Alertmanager + type: alertmanager + access: proxy + url: http://cortex:9009/ + jsonData: + httpHeaderName1: X-Scope-OrgID + implementation: cortex + secureJsonData: + httpHeaderValue1: cortex + version: 1 \ No newline at end of file diff --git a/docs/getting-started/mixin.libsonnet b/docs/getting-started/mixin.libsonnet new file mode 100644 index 0000000000..761bb27fc0 --- /dev/null +++ b/docs/getting-started/mixin.libsonnet @@ -0,0 +1,10 @@ +(import 'config.libsonnet') + +(import 'groups.libsonnet') + +(import 'dashboards.libsonnet') + +(import 'alerts.libsonnet') + +(import 'recording_rules.libsonnet') + { + _config+:: { + dashboard_datasource: 'Cortex', + singleBinary: true, + }, +} diff --git a/docs/getting-started/prometheus-config.yaml b/docs/getting-started/prometheus-config.yaml index 23abed51fd..bea80c9e40 100644 --- a/docs/getting-started/prometheus-config.yaml +++ b/docs/getting-started/prometheus-config.yaml @@ -6,6 +6,23 @@ scrape_configs: - job_name: "prometheus" static_configs: - targets: ["localhost:9090"] + - job_name: "cortex" + static_configs: + - targets: ["cortex:9009"] remote_write: - - url: http://cortex:9009/api/v1/push \ No newline at end of file + - url: http://cortex:9009/api/v1/push + headers: + X-Scope-OrgID: "cortex" + - url: http://cortex:9009/api/v1/push + headers: + X-Scope-OrgID: "tenant-a" + - url: http://cortex:9009/api/v1/push + headers: + X-Scope-OrgID: "tenant-b" + - url: http://cortex:9009/api/v1/push + headers: + X-Scope-OrgID: "tenant-c" + - url: http://cortex:9009/api/v1/push + headers: + X-Scope-OrgID: "tenant-d" diff --git a/docs/getting-started/rules.yaml b/docs/getting-started/rules.yaml new file mode 100644 index 0000000000..6a5646a769 --- /dev/null +++ b/docs/getting-started/rules.yaml @@ -0,0 +1,560 @@ +groups: +- name: cortex_api_1 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_count:sum_rate +- name: cortex_api_2 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate +- name: cortex_api_3 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate +- name: cortex_querier_api + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate +- name: cortex_cache + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_memcache_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_memcache_request_duration_seconds:50quantile + - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_memcache_request_duration_seconds:avg + - expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method) + record: cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_cache_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_cache_request_duration_seconds:50quantile + - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_cache_request_duration_seconds:avg + - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_cache_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_cache_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_cache_request_duration_seconds:50quantile + - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_cache_request_duration_seconds:avg + - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method) + record: cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate +- name: cortex_storage + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds:50quantile + - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds:avg + - expr: sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_bigtable_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds:50quantile + - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds:avg + - expr: sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_cassandra_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds:50quantile + - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds:avg + - expr: sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_dynamo_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_index_lookups_per_query:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_index_lookups_per_query:50quantile + - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_index_lookups_per_query:avg + - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_chunk_store_index_lookups_per_query_bucket:sum_rate + - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_index_lookups_per_query_sum:sum_rate + - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_index_lookups_per_query_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:50quantile + - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:avg + - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_bucket:sum_rate + - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_sum:sum_rate + - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:50quantile + - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:avg + - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_bucket:sum_rate + - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_sum:sum_rate + - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_chunks_per_query:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_chunk_store_chunks_per_query:50quantile + - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_chunks_per_query:avg + - expr: sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_chunk_store_chunks_per_query_bucket:sum_rate + - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_chunks_per_query_sum:sum_rate + - expr: sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job) + record: cluster_job:cortex_chunk_store_chunks_per_query_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_database_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)) + record: cluster_job_method:cortex_database_request_duration_seconds:50quantile + - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_database_request_duration_seconds:avg + - expr: sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method) + record: cluster_job_method:cortex_database_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_database_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method) + record: cluster_job_method:cortex_database_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_gcs_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)) + record: cluster_job_operation:cortex_gcs_request_duration_seconds:50quantile + - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_gcs_request_duration_seconds:avg + - expr: sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation) + record: cluster_job_operation:cortex_gcs_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_gcs_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation) + record: cluster_job_operation:cortex_gcs_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:50quantile + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds:avg + - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate +- name: cortex_queries + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:50quantile + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries:avg + - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:50quantile + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series:avg + - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_chunks:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_chunks:50quantile + - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_chunks:avg + - expr: sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_chunks_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_chunks_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_chunks_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:50quantile + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples:avg + - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_count:sum_rate +- name: cortex_received_samples + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) + record: cluster_namespace_job:cortex_distributor_received_samples:rate5m +- name: cortex_scaling_rules + rules: + - expr: | + sum by (cluster, namespace, deployment) ( + label_replace( + kube_deployment_spec_replicas, + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + or + sum by (cluster, namespace, deployment) ( + label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") + ) + record: cluster_namespace_deployment:actual_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + / 240000 + ) + labels: + deployment: distributor + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 240000 + ) + labels: + deployment: distributor + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + * 3 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by(cluster, namespace) ( + cortex_ingester_memory_series + )[24h:] + ) + / 1500000 + ) + labels: + deployment: ingester + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_overrides{limit_name="max_global_series_per_user"}) + * 3 * 0.59999999999999998 / 1500000 + ) + labels: + deployment: ingester + reason: active_series_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + (sum by (cluster, namespace) ( + cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} + ) / 4) + / + avg by (cluster, namespace) ( + memcached_limit_bytes{job=~".+/memcached"} + ) + ) + labels: + deployment: memcached + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate + - expr: | + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_cpu_cores was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_cpu_cores, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="cpu"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + - expr: | + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + ) + labels: + reason: cpu_usage + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + container_memory_usage_bytes, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_memory_usage_bytes:sum + - expr: | + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_memory_bytes was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_memory_bytes, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="memory"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + ) + labels: + reason: memory_usage + record: cluster_namespace_deployment_reason:required_replicas:count +- name: cortex_alertmanager_rules + rules: + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_alerts) + record: cluster_job_pod:cortex_alertmanager_alerts:sum + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_silences) + record: cluster_job_pod:cortex_alertmanager_silences:sum + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m