Skip to content

Commit

Permalink
Add units to timeseries panel
Browse files Browse the repository at this point in the history
The yaxes field doesn't seem to do anything in the timeseries panel and was replaced with the units field instead. So I defaulted the units to be short and allowed it to be set for the panel.

Signed-off-by: Charlie Le <[email protected]>
  • Loading branch information
CharlieTLe committed Oct 22, 2024
1 parent 3ab33de commit f6a9025
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 141 deletions.
6 changes: 3 additions & 3 deletions cortex-mixin/dashboards/alertmanager.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
)
.addPanel(
$.timeseriesPanel('Latency') +
$.timeseriesPanel('Latency', unit='ms') +
$.latencyPanel('cortex_alertmanager_notification_latency_seconds', '{%s}' % $.jobMatcher($._config.job_names.alertmanager))
)
)
Expand All @@ -84,7 +84,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"api_v1_alerts|alertmanager"}' % $.jobMatcher($._config.job_names.gateway))
)
.addPanel(
$.timeseriesPanel('Latency') +
$.timeseriesPanel('Latency', unit='ms') +
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_v1_alerts|alertmanager')])
)
)
Expand Down Expand Up @@ -166,7 +166,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
}
)
.addPanel(
$.timeseriesPanel('Initial sync duration') +
$.timeseriesPanel('Initial sync duration', unit='s') +
$.latencyPanel('cortex_alertmanager_state_initial_sync_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.alertmanager)) + {
targets: [
target {
Expand Down
13 changes: 5 additions & 8 deletions cortex-mixin/dashboards/compactor.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ local utils = import 'mixin-utils/utils.libsonnet';
'sum(rate(cortex_compactor_runs_failed_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.compactor)
) +
$.bars +
{ yaxes: $.yaxes('ops') } +
$.panelDescription(
'Per-instance runs',
|||
Expand Down Expand Up @@ -44,9 +43,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addRow(
$.row('')
.addPanel(
$.timeseriesPanel('Compacted blocks / sec') +
$.timeseriesPanel('Compacted blocks / sec', unit='ops') +
$.queryPanel('sum(rate(prometheus_tsdb_compactions_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.compactor), 'blocks') +
{ yaxes: $.yaxes('ops') } +
$.panelDescription(
'Compacted blocks / sec',
|||
Expand All @@ -55,7 +53,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
),
)
.addPanel(
$.timeseriesPanel('Per-block compaction duration') +
$.timeseriesPanel('Per-block compaction duration', unit='s') +
$.latencyPanel('prometheus_tsdb_compaction_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.compactor)) +
$.panelDescription(
'Per-block compaction duration',
Expand Down Expand Up @@ -85,9 +83,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addRow(
$.row('Garbage Collector')
.addPanel(
$.timeseriesPanel('Blocks marked for deletion / sec') +
$.queryPanel('sum(rate(cortex_compactor_blocks_marked_for_deletion_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.compactor), 'blocks') +
{ yaxes: $.yaxes('ops') },
$.timeseriesPanel('Blocks marked for deletion / sec', unit='ops') +
$.queryPanel('sum(rate(cortex_compactor_blocks_marked_for_deletion_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.compactor), 'blocks'),
)
.addPanel(
$.successFailurePanel(
Expand All @@ -111,7 +108,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
) + { yaxes: $.yaxes('ops') }
)
.addPanel(
$.timeseriesPanel('Metadata Sync Duration') +
$.timeseriesPanel('Metadata Sync Duration', unit='ms') +
// This metric tracks the duration of a per-tenant metadata sync.
$.latencyPanel('cortex_compactor_meta_sync_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.compactor)),
)
Expand Down
10 changes: 4 additions & 6 deletions cortex-mixin/dashboards/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addRow(
$.row('Startup config file')
.addPanel(
$.timeseriesPanel('Startup config file hashes') +
$.timeseriesPanel('Startup config file hashes', unit='instances') +
$.queryPanel('count(cortex_config_hash{%s}) by (sha256)' % $.namespaceMatcher(), 'sha256:{{sha256}}') +
$.stack +
{ yaxes: $.yaxes('instances') },
$.stack,
)
)
.addRow(
$.row('Runtime config file')
.addPanel(
$.timeseriesPanel('Runtime config file hashes') +
$.timeseriesPanel('Runtime config file hashes', unit='instances') +
$.queryPanel('count(cortex_runtime_config_hash{%s}) by (sha256)' % $.namespaceMatcher(), 'sha256:{{sha256}}') +
$.stack +
{ yaxes: $.yaxes('instances') },
$.stack,
)
),
}
94 changes: 80 additions & 14 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,44 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addTemplate('cluster', 'cortex_build_info', 'cluster')
.addTemplate('namespace', 'cortex_build_info{cluster=~"$cluster"}', 'namespace'),
},
timeseriesPanel(title, unit='short'):: {
datasource: '$datasource',
fieldConfig: {
defaults: {
custom: {
drawStyle: 'line',
fillOpacity: 1,
lineWidth: 1,
pointSize: 5,
showPoints: 'never',
spanNulls: false,
stacking: {
group: 'A',
mode: 'none',
},
},
thresholds: {
mode: 'absolute',
steps: [],
},
unit: unit,
},
overrides: [],
},
options: {
legend: {
showLegend: true,
},
tooltip: {
mode: 'single',
sort: 'none',
},
},
links: [],
targets: [],
title: title,
type: 'timeseries',
},

// The mixin allow specialism of the job selector depending on if its a single binary
// deployment or a namespaced one.
Expand Down Expand Up @@ -108,6 +146,35 @@ local utils = import 'mixin-utils/utils.libsonnet';
}
for target in super.targets
],
fieldConfig+: {
defaults+: {
custom+: {
lineWidth: 0,
fillOpacity: 100, // Get solid fill.
stacking: {
mode: 'normal',
group: 'A',
},
},
unit: 'reqps',
min: 0,
},
overrides+: [{
matcher: {
id: 'byName',
options: status,
},
properties: [
{
id: 'color',
value: {
mode: 'fixed',
fixedColor: $.httpStatusColors[status],
},
},
],
} for status in std.objectFieldsAll($.httpStatusColors)],
},
},

latencyPanel(metricName, selector, multiplier='1e3')::
Expand All @@ -121,7 +188,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
},

successFailurePanel(title, successMetric, failureMetric)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='short') +
$.queryPanel([successMetric, failureMetric], ['successful', 'failed']) +
$.stack + {
aliasColors: {
Expand All @@ -132,7 +199,7 @@ local utils = import 'mixin-utils/utils.libsonnet';

// Displays started, completed and failed rate.
startedCompletedFailedPanel(title, startedMetric, completedMetric, failedMetric)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='ops') +
$.queryPanel([startedMetric, completedMetric, failedMetric], ['started', 'completed', 'failed']) +
$.stack + {
aliasColors: {
Expand Down Expand Up @@ -160,7 +227,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
},

containerMemoryWorkingSetPanel(title, containerName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='bytes') +
$.queryPanel([
// We use "max" instead of "sum" otherwise during a rolling update of a statefulset we will end up
// summing the memory of the old instance/pod (whose metric will be stale for 5m) to the new instance/pod.
Expand All @@ -180,7 +247,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
},

containerNetworkPanel(title, metric, instanceName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='Bps') +
$.queryPanel(
'sum by(%(instance)s) (rate(%(metric)s{%(namespace)s,%(instance)s=~"%(instanceName)s"}[$__rate_interval]))' % {
namespace: $.namespaceMatcher(),
Expand All @@ -199,7 +266,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.containerNetworkPanel('Transmit Bandwidth', 'container_network_transmit_bytes_total', instanceName),

containerDiskWritesPanel(title, containerName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='Bps') +
$.queryPanel(
|||
sum by(%s, %s, device) (
Expand All @@ -220,7 +287,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
{ yaxes: $.yaxes('Bps') },

containerDiskReadsPanel(title, containerName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='Bps') +
$.queryPanel(
|||
sum by(%s, %s, device) (
Expand All @@ -239,7 +306,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
{ yaxes: $.yaxes('Bps') },

containerDiskSpaceUtilization(title, containerName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='percentunit') +
$.queryPanel(
|||
max by(persistentvolumeclaim) (
Expand All @@ -266,7 +333,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
else 'label_name="%s"' % containerName,

goHeapInUsePanel(title, jobName)::
$.timeseriesPanel(title) +
$.timeseriesPanel(title, unit='bytes') +
$.queryPanel(
'sum by(%s) (go_memstats_heap_inuse_bytes{%s})' % [$._config.per_instance_label, $.jobMatcher(jobName)],
'{{%s}}' % $._config.per_instance_label
Expand Down Expand Up @@ -361,13 +428,12 @@ local utils = import 'mixin-utils/utils.libsonnet';
getObjectStoreRows(title, component):: [
super.row(title)
.addPanel(
$.timeseriesPanel('Operations / sec') +
$.timeseriesPanel('Operations / sec', unit='rps') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s,component="%s"}[$__rate_interval]))' % [$.namespaceMatcher(), component], '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
$.stack
)
.addPanel(
$.timeseriesPanel('Error rate') +
$.timeseriesPanel('Error rate', unit='percentunit') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{%s,component="%s"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s,component="%s"}[$__rate_interval]))' % [$.namespaceMatcher(), component, $.namespaceMatcher(), component], '{{operation}}') +
{ yaxes: $.yaxes('percentunit') },
)
Expand Down Expand Up @@ -406,7 +472,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
};
super.row(title)
.addPanel(
$.timeseriesPanel('Requests / sec') +
$.timeseriesPanel('Requests / sec', unit='ops') +
$.queryPanel(
|||
sum by(operation) (
Expand Down Expand Up @@ -439,7 +505,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
)
.addPanel(
$.timeseriesPanel('Hit ratio') +
$.timeseriesPanel('Hit ratio', unit='percentunit') +
$.queryPanel(
|||
sum(
Expand Down
20 changes: 8 additions & 12 deletions cortex-mixin/dashboards/object-store.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,25 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addRow(
$.row('Components')
.addPanel(
$.timeseriesPanel('RPS / component') +
$.timeseriesPanel('RPS / component', unit='rps') +
$.queryPanel('sum by(component) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % $.namespaceMatcher(), '{{component}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
$.stack,
)
.addPanel(
$.timeseriesPanel('Error rate / component') +
$.queryPanel('sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{component}}') +
{ yaxes: $.yaxes('percentunit') },
$.timeseriesPanel('Error rate / component', unit='percentunit') +
$.queryPanel('sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{component}}')
)
)
.addRow(
$.row('Operations')
.addPanel(
$.timeseriesPanel('RPS / operation') +
$.timeseriesPanel('RPS / operation', unit='rps') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % $.namespaceMatcher(), '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
$.stack,
)
.addPanel(
$.timeseriesPanel('Error rate / operation') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{operation}}') +
{ yaxes: $.yaxes('percentunit') },
$.timeseriesPanel('Error rate / operation', unit='percentunit') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s}[$__rate_interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{operation}}')
)
)
.addRow(
Expand Down
Loading

0 comments on commit f6a9025

Please sign in to comment.