Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Materialized view #3

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions create_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ CREATE TABLE core_data (
batch_id INT REFERENCES batch(batch_id)
-- should have a primary key on what? (geography_id, batch_id) ?
);

CREATE MATERIALIZED VIEW daily_core_data AS
SELECT core_data.* FROM (SELECT state_name, MAX(batch_id) as max_bid FROM core_data GROUP BY state_name,data_date)
AS x JOIN core_data ON batch_id=x.max_bid AND core_data.state_name=x.state_name;
12 changes: 9 additions & 3 deletions external_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,22 @@ SELECT state_name, MAX(core_data.batch_id) as max_bid
WHERE batch.is_published = TRUE
GROUP BY state_name;

/* States current: What's the latest published (non-preview) data for all states? */
/* States current: What's the latest published (non-preview) data for all states?
As written, this assumes that the latest date published for any state is the most recent for all states.
*/
WITH temp (latest_date) AS (SELECT MAX(data_date) FROM core_data)
SELECT * FROM (
SELECT state_name, MAX(core_data.batch_id) as max_bid
FROM core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id
WHERE batch.is_published = TRUE
FROM temp,core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id
WHERE batch.is_published = TRUE AND core_data.data_date = temp.latest_date
GROUP BY state_name) AS latest_state_batches
INNER JOIN core_data ON (
core_data.batch_id = latest_state_batches.max_bid AND
core_data.state_name = latest_state_batches.state_name)
INNER JOIN batch ON (core_data.batch_id = batch.batch_id);

/* States current/on a date using materialized view */
SELECT * FROM daily_core_data WHERE data_date = '2020-03-20';

/* States daily: What's the published daily data for all states, incorporating all edits? */
SELECT * FROM (
Expand Down
13 changes: 13 additions & 0 deletions init_test_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,17 @@ BEGIN
('NY', '2020-03-21 17:00:00', '2020-03-21 17:56:00', '2020-03-21', 210, 'JK', 'NY afternoon', last_batch_id);
END $$;

/* Revise data from the 20th. This will cause issues with naive queries that just get the most recent batchid */
DO $$
DECLARE last_batch_id BIGINT;
BEGIN
INSERT INTO batch
(created_at, shift_lead, batch_note, is_published, is_revision, data_entry_type) VALUES
('2020-03-21 20:05:00', 'AS', '3/20 night', TRUE, TRUE, 'push')
RETURNING batch_id INTO last_batch_id;
INSERT INTO core_data
(state_name, last_update_time, last_check_time, data_date, tests, checker, public_notes, batch_id) VALUES
('PA', '2020-03-21 17:00:00', '2020-03-21 17:55:00', '2020-03-20', 167, 'AS', 'PA afternoon', last_batch_id);
END $$;

SELECT * FROM core_data INNER JOIN batch ON core_data.batch_id = batch.batch_id;