From 8a92a1eda2ddff1367af759905af516669090849 Mon Sep 17 00:00:00 2001 From: Alex Silverstein Date: Wed, 29 Apr 2020 22:35:29 -0400 Subject: [PATCH 1/2] Demonstrate issue with pure-batchid approach to queries --- external_query.sql | 9 ++++++--- init_test_data.sql | 13 +++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/external_query.sql b/external_query.sql index 232af2e..95736f8 100644 --- a/external_query.sql +++ b/external_query.sql @@ -32,11 +32,14 @@ SELECT state_name, MAX(core_data.batch_id) as max_bid WHERE batch.is_published = TRUE GROUP BY state_name; -/* States current: What's the latest published (non-preview) data for all states? */ +/* States current: What's the latest published (non-preview) data for all states? +As written, this assumes that the latest date published for any state is the most recent for all states. +*/ +WITH temp (latest_date) AS (SELECT MAX(data_date) FROM core_data) SELECT * FROM ( SELECT state_name, MAX(core_data.batch_id) as max_bid - FROM core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id - WHERE batch.is_published = TRUE + FROM temp,core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id + WHERE batch.is_published = TRUE AND core_data.data_date = temp.latest_date GROUP BY state_name) AS latest_state_batches INNER JOIN core_data ON ( core_data.batch_id = latest_state_batches.max_bid AND diff --git a/init_test_data.sql b/init_test_data.sql index 867a820..d3f3416 100644 --- a/init_test_data.sql +++ b/init_test_data.sql @@ -133,4 +133,17 @@ BEGIN ('NY', '2020-03-21 17:00:00', '2020-03-21 17:56:00', '2020-03-21', 210, 'JK', 'NY afternoon', last_batch_id); END $$; +/* Revise data from the 20th. This will cause issues with naive queries that just get the most recent batchid */ +DO $$ +DECLARE last_batch_id BIGINT; +BEGIN + INSERT INTO batch + (created_at, shift_lead, batch_note, is_published, is_revision, data_entry_type) VALUES + ('2020-03-21 20:05:00', 'AS', '3/20 night', TRUE, TRUE, 'push') + RETURNING batch_id INTO last_batch_id; + INSERT INTO core_data + (state_name, last_update_time, last_check_time, data_date, tests, checker, public_notes, batch_id) VALUES + ('PA', '2020-03-21 17:00:00', '2020-03-21 17:55:00', '2020-03-20', 167, 'AS', 'PA afternoon', last_batch_id); +END $$; + SELECT * FROM core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id; From 3d8e25a2f43268afb6629445b689bddf7979bd1f Mon Sep 17 00:00:00 2001 From: Alex Silverstein Date: Wed, 29 Apr 2020 22:58:30 -0400 Subject: [PATCH 2/2] Materialized view example --- create_tables.sql | 4 ++++ external_query.sql | 3 +++ 2 files changed, 7 insertions(+) diff --git a/create_tables.sql b/create_tables.sql index b5c3ea0..5838298 100644 --- a/create_tables.sql +++ b/create_tables.sql @@ -34,3 +34,7 @@ CREATE TABLE core_data ( batch_id INT REFERENCES batch(batch_id) -- should have a primary key on what? (geography_id, batch_id) ? ); + +CREATE MATERIALIZED VIEW daily_core_data AS +SELECT core_data.* FROM (SELECT state_name, MAX(batch_id) as max_bid FROM core_data GROUP BY state_name,data_date) +AS x JOIN core_data ON batch_id=x.max_bid AND core_data.state_name=x.state_name; diff --git a/external_query.sql b/external_query.sql index 95736f8..0b675d2 100644 --- a/external_query.sql +++ b/external_query.sql @@ -45,6 +45,9 @@ SELECT * FROM ( core_data.batch_id = latest_state_batches.max_bid AND core_data.state_name = latest_state_batches.state_name) INNER JOIN batch ON (core_data.batch_id = batch.batch_id); + +/* States current/on a date using materialized view */ +SELECT * FROM daily_core_data WHERE data_date = '2020-03-20'; /* States daily: What's the published daily data for all states, incorporating all edits? */ SELECT * FROM (