From 72aba367608d78c2724de86a6f532fc22b6136eb Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 18 Jun 2024 10:26:22 -0600 Subject: [PATCH] Add make_view_incremental, tests, documentation, etc. Committing this functional subset to get the PR open and start working code into a release. --- META.json | 1 + doc/reference.md | 24 ++++++++++++ sql/timeseries.sql | 74 +++++++++++++++++++++++++++++++++++ test/expected/basic_usage.out | 65 ++++++++++++++++++++++++++++++ test/sql/basic_usage.sql | 31 +++++++++++++++ timeseries.control | 2 +- 6 files changed, 196 insertions(+), 1 deletion(-) diff --git a/META.json b/META.json index 8c62f3b..cc37b24 100644 --- a/META.json +++ b/META.json @@ -23,6 +23,7 @@ "recommends": { "columnar": "1.1.2", "pg_cron": "1.6.2", + "pg_ivm": "1.8", "pg_partman": "4.5.1" } } diff --git a/doc/reference.md b/doc/reference.md index 7a7a958..f5e47e8 100644 --- a/doc/reference.md +++ b/doc/reference.md @@ -132,6 +132,30 @@ Clears the compression policy for a table (so its data will never be dropped). `interval`, the previous policy for this table, or `NULL` if none was set. +## Incremental Views + +These functions deal with the creation and maintenance of incremental views, or more verbosely, _incrementally maintainable materialized views_ (IMMVs). The underlying functionality is provided the the `pg_ivm` ("PostgreSQL Incremental View Maintenance") project, which handles applying changes toward any partial aggregates and groups which are affected by incoming `INSERT`, `UPDATE`, `DELETE`, etc. (DML) statements. + +Within `pg_timeseries`, it is expected that these functions be applied against existing time-series enhanced tables. + +### `make_view_incremental` + +This function takes an existing view—typically used to calculate aggregates across a time-series data set—and makes it into an incremental view: one whose aggregates are stored in a materialized fashion, but which is kept up-to-date as every individual modification to the underlying table is processed. + +After this function exits, the same view will exist, but will now point at an _incrementally maintainable materialized view_. This view maintains its correctness through a series of statement triggers on the time-series table, meaning that for maximum performance, some degree of row batching should be used for ingest. + +#### Arguments + + * `target_view_id` (`regclass`), **required** — a view aggregating a single existing time-series table + +#### Considerations + +Views which reference more than one table are presently forbidden, though this restriction may be lifted at a later date. + +#### Returns + +`void`. If any problems are encountered, an error will be raised. + ## Analytics Functions These functions are not related to the maintenance of time-series tables, but do sometimes rely on the related metadata to function. They are intended to make time-series queries easier to read and maintain. diff --git a/sql/timeseries.sql b/sql/timeseries.sql index 1eba835..d6233a9 100644 --- a/sql/timeseries.sql +++ b/sql/timeseries.sql @@ -613,3 +613,77 @@ CREATE AGGREGATE locf(anyelement) ( SFUNC = locf_agg, STYPE = anyelement ); + +-- A wrapper around the incremental view functionality provided by pg_ivm. This +-- function accepts a pointer to an existing view (presumably one that queries +-- a time-series table) and rewrites that view to point at an underlying IMMV +-- instead. The purpose of this is to avoid exposing users to implementation- +-- specific columns used to track partial aggregate state within IMMV tables: +-- these columns may confuse users or interfere with software which expects +-- all visible columns to be meaningful to an application (say, dashboards). +-- +-- For the record, IMMV means "incrementally maintainable materialized view", +-- though at the moment these objects are actually plain tables. They are +-- created by calling functions from the pg_ivm project ("PostgreSQL incre- +-- mental view maintenance"). After creation, these views automatically keep +-- aggregates up-to-date with the latest data written to any referenced tables. +-- This is an improvement over plain views (which reaggregate on every scan) +-- and materialized views (which require periodic calls to REFRESH). +-- +-- Within this wrapper, the provided view's SQL is extracted, then a new IMMV +-- is created using the extracted query. Because this IMMV will have at least +-- one (sometimes two or more) internal column per aggregate (beginning with +-- the prefix '__ivm_'), it is desirable to allow the user to query a wrapper +-- view instead. So the original view's query is redefined to point at the +-- IMMV table, but restricted solely to the visible columns of the original +-- query. Big picture: the user can create an aggregating view, check it +-- quickly (with a LIMIT, for instance), then make it incremental once they +-- have verified it's producing the aggregate rows they want. +CREATE OR REPLACE FUNCTION public.make_view_incremental(target_view_id regclass) + RETURNS void + LANGUAGE plpgsql +AS $function$ +DECLARE + orig_view_sql text; + immv_name text; + columns_sql text; + old_client_msg text; + old_log_msg text; +BEGIN + -- get the original SQL definition of the view to be upgraded + SELECT pg_get_viewdef(target_view_id) INTO orig_view_sql; + + -- build the name we'll use for the new immv + SELECT format('%I.%I', n.nspname, c.relname || '_immv') + INTO immv_name + FROM pg_class c + LEFT JOIN pg_namespace n + ON n.oid = c.relnamespace + WHERE c.oid=target_view_id; + + -- pg_ivm is chatty, even at NOTICE, so turn it down + SELECT current_setting('client_min_messages') INTO old_client_msg; + SELECT current_setting('log_min_messages') INTO old_log_msg; + SET LOCAL client_min_messages TO warning; + SET LOCAL log_min_messages TO warning; + + -- create an immv with the original view's SQL + PERFORM create_immv(immv_name, orig_view_sql); + + -- restore previous client/log levels + EXECUTE format('SET LOCAL client_min_messages TO %s', old_client_msg); + EXECUTE format('SET LOCAL log_min_messages TO %s', old_log_msg); + + -- build a list of visible columns for the view + SELECT string_agg(quote_ident(attname), ', ' ORDER BY attnum ASC) + INTO columns_sql + FROM pg_attribute + WHERE attnum > 0 AND + NOT attisdropped AND + attrelid=target_view_id; + + -- modify the existing view to point at the immv, hiding internal columns + EXECUTE format('CREATE OR REPLACE VIEW %I AS SELECT %s FROM %s', + target_view_id, columns_sql, immv_name); +END; +$function$; diff --git a/test/expected/basic_usage.out b/test/expected/basic_usage.out index 15b1e31..cfdb6d5 100644 --- a/test/expected/basic_usage.out +++ b/test/expected/basic_usage.out @@ -238,3 +238,68 @@ ORDER BY 3; 2 | 1.9000000000000001 | Wed Nov 04 15:59:00 2020 PST (9 rows) +CREATE VIEW events_5m AS + SELECT + user_id, + date_bin('5 minutes', + event_time, + TIMESTAMPTZ '1970-01-01') AS event_time, + max(value), + min(value) + FROM events + GROUP BY 1, 2; +CREATE VIEW events_totals AS + SELECT + user_id, + sum(value), + count(user_id) + FROM events + GROUP BY 1; +SELECT make_view_incremental('events_5m'); + make_view_incremental +----------------------- + +(1 row) + +SELECT make_view_incremental('events_totals'); + make_view_incremental +----------------------- + +(1 row) + +SELECT * FROM events_5m ORDER BY 1, 2; + user_id | event_time | max | min +---------+------------------------------+-----+----- + 1 | Wed Nov 04 15:50:00 2020 PST | 1.2 | 1.1 + 1 | Wed Nov 04 15:55:00 2020 PST | 1.6 | 1.3 + 2 | Wed Nov 04 15:50:00 2020 PST | 1.8 | 1.7 + 2 | Wed Nov 04 15:55:00 2020 PST | 2.2 | 1.9 +(4 rows) + +SELECT * FROM events_totals ORDER BY 1; + user_id | sum | count +---------+------+------- + 1 | 8.1 | 6 + 2 | 11.7 | 6 +(2 rows) + +INSERT INTO events VALUES (3, 1, '2020-11-04 15:51:02.226999-08', 1.1); +DELETE FROM events WHERE event_id = 12; +SELECT * FROM events_5m ORDER BY 1, 2; + user_id | event_time | max | min +---------+------------------------------+-----+----- + 1 | Wed Nov 04 15:50:00 2020 PST | 1.2 | 1.1 + 1 | Wed Nov 04 15:55:00 2020 PST | 1.6 | 1.3 + 2 | Wed Nov 04 15:50:00 2020 PST | 1.8 | 1.7 + 2 | Wed Nov 04 15:55:00 2020 PST | 2.1 | 1.9 + 3 | Wed Nov 04 15:50:00 2020 PST | 1.1 | 1.1 +(5 rows) + +SELECT * FROM events_totals ORDER BY 1; + user_id | sum | count +---------+-----+------- + 1 | 8.1 | 6 + 2 | 9.5 | 5 + 3 | 1.1 | 1 +(3 rows) + diff --git a/test/sql/basic_usage.sql b/test/sql/basic_usage.sql index d7f4643..8bfb104 100644 --- a/test/sql/basic_usage.sql +++ b/test/sql/basic_usage.sql @@ -99,3 +99,34 @@ FROM date_bin_table(NULL::events, '1 minute', '(2020-11-04 15:50:00-08, 2020-11-04 16:00:00-08)') GROUP BY 3 ORDER BY 3; + +CREATE VIEW events_5m AS + SELECT + user_id, + date_bin('5 minutes', + event_time, + TIMESTAMPTZ '1970-01-01') AS event_time, + max(value), + min(value) + FROM events + GROUP BY 1, 2; + +CREATE VIEW events_totals AS + SELECT + user_id, + sum(value), + count(user_id) + FROM events + GROUP BY 1; + +SELECT make_view_incremental('events_5m'); +SELECT make_view_incremental('events_totals'); + +SELECT * FROM events_5m ORDER BY 1, 2; +SELECT * FROM events_totals ORDER BY 1; + +INSERT INTO events VALUES (3, 1, '2020-11-04 15:51:02.226999-08', 1.1); +DELETE FROM events WHERE event_id = 12; + +SELECT * FROM events_5m ORDER BY 1, 2; +SELECT * FROM events_totals ORDER BY 1; diff --git a/timeseries.control b/timeseries.control index 1aa7027..45bd8c7 100644 --- a/timeseries.control +++ b/timeseries.control @@ -2,4 +2,4 @@ comment = 'Convenience API for Tembo time series stack' default_version = '0.1.5' module_pathname = '$libdir/timeseries' -requires = 'columnar, pg_cron, pg_partman' +requires = 'columnar, pg_cron, pg_ivm, pg_partman'