From 19d8e3e516583b2fcd049529b81be5f19f8fac19 Mon Sep 17 00:00:00 2001 From: niksajakovljevic Date: Mon, 12 Dec 2022 16:03:56 +0100 Subject: [PATCH] Alert when PG shared_buffers is smaller than open chunks --- docs/mixin/alerts/alerts.yaml | 15 +++++++ .../PromscalePostgreSQLSharedBuffersLow.md | 21 ++++++++++ pkg/pgmodel/metrics/database/metrics.go | 42 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md diff --git a/docs/mixin/alerts/alerts.yaml b/docs/mixin/alerts/alerts.yaml index 813d09bebe..5351c800d8 100644 --- a/docs/mixin/alerts/alerts.yaml +++ b/docs/mixin/alerts/alerts.yaml @@ -342,3 +342,18 @@ groups: summary: High uncompressed data. description: "High uncompressed data in Promscale, on average, {{ $value }} uncompressed chunks per metric." runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md + - alert: PromscalePostgreSQLSharedBuffersLow + expr: | + ( + ((promscale_sql_database_open_chunks_total_table_size + promscale_sql_database_open_chunks_total_index_size) + / + promscale_sql_database_shared_buffers_size) + > 1 ) + for: 10m + labels: + severity: warning + annotations: + summary: Promscale database performance will be affected. + description: "Currently open chunks are {{ $value | humanizePercentage }} of PostgreSQL shared_buffers. This will impact database performance." + runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md + diff --git a/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md b/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md new file mode 100644 index 0000000000..21b883c8da --- /dev/null +++ b/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md @@ -0,0 +1,21 @@ +# PromscalePostgreSQLSharedBuffersLow + +## Meaning + +Open chunks (the chunks where data is current written into) can't fit into +PostgreSQL shared buffers. Total size is calculated by summing up all chunk +relations and indexes sizes. + +## Impact + +Database performance will be affected, especially the ingest speed. The effect +will be less if you are running PostgreSQL on fast local disk. + +## Mitigation + +Increase percentage of PostgreSQL memory allocated to shared buffers. +If you have already allocated huge percentage of memory to shared buffers (eg 75%) +you should consider increasing database memory. +`shared_buffers` can be set through `postgresql.conf`. +To make sure that your new setting is applied you can run: +`SELECT * FROM pg_settings WHERE name = 'shared_buffers';` diff --git a/pkg/pgmodel/metrics/database/metrics.go b/pkg/pgmodel/metrics/database/metrics.go index aea9d053b4..2cded2ea31 100644 --- a/pkg/pgmodel/metrics/database/metrics.go +++ b/pkg/pgmodel/metrics/database/metrics.go @@ -578,6 +578,48 @@ var metrics = []metricQueryWrap{ ), query: `select count(*)::bigint from _prom_catalog.metric`, }, + { + metrics: gauges( + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "shared_buffers_size", + Help: "Size of shared_buffers in bytes", + }, + ), + query: `SELECT (setting::BIGINT*pg_size_bytes(unit))::BIGINT FROM pg_settings WHERE name = 'shared_buffers'`, + }, + { + metrics: gauges( + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "open_chunks_total_table_size", + Help: "Total table size of currently open chunks in bytes", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "open_chunks_total_index_size", + Help: "Total indexes size of currently open chunks in bytes", + }, + ), + query: `SELECT + coalesce(sum(chunk_total_size)::BIGINT,0) as total_table_size, + coalesce(sum(chunk_index_size)::BIGINT, 0) as total_index_size + FROM ( + SELECT DISTINCT ON (hypertable_id) + pg_indexes_size(format('%I.%I', c.schema_name, c.table_name)) chunk_index_size, + pg_total_relation_size(format('%I.%I', c.schema_name, c.table_name)) chunk_total_size + FROM _timescaledb_catalog.dimension_slice ds + INNER JOIN _timescaledb_catalog.chunk_constraint cc on (cc.dimension_slice_id = ds.id) + INNER JOIN _timescaledb_catalog.chunk c on (c.id = cc.chunk_id) + WHERE range_end > _timescaledb_internal.time_to_internal(now()- interval '30 minutes') + and range_start < _timescaledb_internal.time_to_internal(now()) + ORDER BY hypertable_id, range_end DESC + ) AS info;`, + customPollConfig: updateAtMostEvery(6 * time.Minute), + }, } // GetMetric returns the metric whose Description best matches the supplied name.