Skip to content

Commit

Permalink
Merge pull request #4217 from thematters/feat/deprecate-tag-management
Browse files Browse the repository at this point in the history
feat(tag): retire unused tag queries and previous deprecated
  • Loading branch information
gitwoz authored Nov 18, 2024
2 parents a0e509f + 908770f commit 38151d1
Show file tree
Hide file tree
Showing 66 changed files with 456 additions and 3,627 deletions.
58 changes: 58 additions & 0 deletions db/migrations/20241112112619_create_tag_stats_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const materialized = 'tag_stats_materialized'

exports.up = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`)

await knex.raw(`
CREATE MATERIALIZED VIEW ${materialized} AS
WITH article_tags AS (
SELECT
at.tag_id,
at.article_id,
article.author_id
FROM article_tag at
INNER JOIN article
ON article.id = at.article_id
INNER JOIN "user" u
ON u.id = article.author_id
WHERE article.state = 'active'
AND u.state NOT IN ('forzen', 'archived')
AND u.id NOT IN (
SELECT user_id
FROM user_restriction
)
AND at.created_at AT TIME ZONE 'Asia/Taipei' >= NOW() - INTERVAL '12 months'
AND at.created_at AT TIME ZONE 'Asia/Taipei' < NOW()
),
tag_stats AS (
SELECT
tag_id,
COUNT(article_id)::INT AS all_articles,
COUNT(DISTINCT author_id)::INT AS all_users
FROM article_tags
GROUP BY tag_id
),
user_threshold AS (
SELECT DISTINCT
PERCENTILE_CONT(0.15) WITHIN GROUP (ORDER BY all_users) AS threshold
FROM tag_stats
)
SELECT
ts.tag_id,
tag.content,
ts.all_articles,
ts.all_users
FROM tag_stats ts
INNER JOIN tag
ON tag.id = ts.tag_id
CROSS JOIN user_threshold ut
WHERE ts.all_users > ut.threshold
ORDER BY
ts.all_users DESC,
ts.all_articles DESC
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`)
}
27 changes: 27 additions & 0 deletions db/migrations/20241112165500_create_article_stats_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const table = 'article_stats_materialized'

exports.up = async (knex) => {
await knex.raw(`
CREATE MATERIALIZED VIEW ${table} AS
SELECT
COALESCE(r.article_id, c.reference_id) as article_id,
COALESCE(r.reads, 0) as reads,
COALESCE(c.claps, 0) as claps
FROM (
SELECT article_id, sum(timed_count)::int AS reads
FROM article_read_count
WHERE user_id IS NOT NULL
GROUP BY article_id
) r
FULL OUTER JOIN (
SELECT reference_id, sum(amount)::int AS claps
FROM appreciation
WHERE purpose = 'appreciate'
GROUP BY reference_id
) c ON r.article_id = c.reference_id
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`)
}
71 changes: 71 additions & 0 deletions db/migrations/20241113094622_create_tag_hottest_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const table = 'tag_hottest_materialized'

exports.up = async (knex) => {
await knex.raw(`
CREATE MATERIALIZED VIEW ${table} AS
WITH raw_article_data AS (
SELECT
at.tag_id,
to_char(article.created_at, 'YYYYMM') AS month,
at.article_id,
article.author_id
FROM article_tag AS at
INNER JOIN article ON article.id = at.article_id
INNER JOIN "user" AS u ON u.id = article.author_id
WHERE article.state = 'active'
AND u.state NOT in('frozen', 'archived')
AND u.id NOT in(SELECT user_id FROM user_restriction)
),
monthly_stats AS (
SELECT tag_id, month,
count(article_id)::int articles,
count(DISTINCT author_id)::int users
FROM raw_article_data
GROUP BY tag_id, month
),
tag_averages AS (
SELECT tag_id,
count(month) AS months,
avg(articles) AS mean_articles,
avg(users) AS mean_users
FROM monthly_stats
GROUP BY tag_id
),
tag_z_scores AS (
SELECT tag_id, months,
(months - avg(months) OVER()) / NULLIF(stddev(months) OVER(), 0) AS z_months,
mean_articles,
(mean_articles - avg(mean_articles) OVER()) / NULLIF(stddev(mean_articles) OVER(), 0) AS z_articles,
mean_users,
(mean_users - avg(mean_users) OVER()) / NULLIF(stddev(mean_users) OVER(), 0) AS z_users
FROM tag_averages
),
significant_scores AS (
SELECT tag_id, months,
CASE WHEN z_months < 2 THEN 0 ELSE z_months END AS z_months,
mean_articles,
CASE WHEN z_articles < 2 THEN 0 ELSE z_articles END AS z_articles,
mean_users,
CASE WHEN z_users < 2 THEN 0 ELSE z_users END AS z_users
FROM tag_z_scores
)
SELECT
base.tag_id,
tag.content,
base.months,
base.mean_articles,
base.mean_users,
base.score
FROM (
SELECT *, z_months * z_articles * z_users AS score
FROM significant_scores
) AS base
INNER JOIN tag ON tag.id = base.tag_id
WHERE base.score > 0
ORDER BY base.score DESC
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`)
}
Loading

0 comments on commit 38151d1

Please sign in to comment.