Skip to content

Commit

Permalink
Tech Report: Category total origins by client (#50)
Browse files Browse the repository at this point in the history
* by client

* clients in a record

* fix aggregation

* Update definitions/output/reports/cwv_tech_categories.js

Co-authored-by: Barry Pollard <[email protected]>

* Update definitions/output/reports/cwv_tech_categories.js

Co-authored-by: Barry Pollard <[email protected]>

* consistent naming

* category sort

* cross-platform origins

* distinct technologies

---------

Co-authored-by: Barry Pollard <[email protected]>
  • Loading branch information
max-ostapenko and tunetheweb authored Jan 20, 2025
1 parent 295a45d commit 68fce03
Showing 1 changed file with 38 additions and 22 deletions.
60 changes: 38 additions & 22 deletions definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,69 @@ publish('cwv_tech_categories', {
}).query(ctx => `
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "categories", "type": "dict"} */
WITH pages AS (
SELECT
SELECT DISTINCT
client,
root_page,
technologies
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}' AND
client = 'mobile'
date = '${pastMonth}'
${constants.devRankFilter}
), categories AS (
),
category_descriptions AS (
SELECT
name AS category,
description
FROM ${ctx.ref('wappalyzer', 'categories')}
), category_stats AS (
),
category_stats AS (
SELECT
category,
COUNT(DISTINCT root_page) AS origins
FROM pages,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
STRUCT(
COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile
) AS origins
FROM (
SELECT
client,
category,
COUNT(DISTINCT root_page) AS origins
FROM pages
LEFT JOIN pages.technologies AS tech
LEFT JOIN tech.categories AS category
GROUP BY
client,
category
)
GROUP BY category
), technology_stats AS (
),
technology_stats AS (
SELECT
category,
technology,
COUNT(DISTINCT root_page) AS origins
FROM pages,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
category_obj AS categories,
SUM(origins) AS total_origins
FROM ${ctx.ref('reports', 'cwv_tech_technologies')}
GROUP BY
category,
technology
technology,
categories
)
SELECT
category,
description,
category_stats.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technology_stats.origins DESC) AS technologies
origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technology_stats.total_origins DESC) AS technologies
FROM category_stats
INNER JOIN technology_stats
USING (category)
LEFT JOIN categories
ON category_stats.category IN UNNEST(technology_stats.categories)
INNER JOIN category_descriptions
USING (category)
GROUP BY
category,
description,
origins
ORDER BY origins DESC
ORDER BY category ASC
`)

0 comments on commit 68fce03

Please sign in to comment.