From c544a6a1f04a66d2b99dfcb935f70a16a983f48a Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Thu, 16 Jan 2025 23:19:18 +0700 Subject: [PATCH 1/4] fix case-sensitive alias parsing bug --- macros/aql/parse.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/aql/parse.sql b/macros/aql/parse.sql index 7714daa..1c3b3b9 100644 --- a/macros/aql/parse.sql +++ b/macros/aql/parse.sql @@ -379,7 +379,7 @@ be wrapped in a valid aggregation function. {%- set ws_as = ws~"as"~ws -%} {%- if modules.re.search(ws_as, column_str, modules.re.IGNORECASE) is not none -%} - {%- set alias_base = modules.re.split(ws_as, column_str.strip())[-1] -%} + {%- set alias_base = modules.re.split(ws_as, column_str.strip(), modules.re.IGNORECASE)[-1] -%} {%- set alias = alias_base.translate(alias_base.maketrans("","", punc)).strip() -%} {%- else -%} {%- set alias = dbt_activity_schema.alias_column(activity_name, column, verb, relationship_selector, join_condition, n) -%} From c2e262104e6868ac455322639d2df0f74ad2ae1c Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Fri, 17 Jan 2025 00:39:52 +0700 Subject: [PATCH 2/4] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 62145e6..69649b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .DS_Store logs .envrc +.env \ No newline at end of file From 6609eaa2051df7cee1432f098962f19082486b7d Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Fri, 17 Jan 2025 00:40:39 +0700 Subject: [PATCH 3/4] fix time spine bq compatibility issues --- .../dataset/_build_dataset.sql | 49 ++++++--- .../dataset/aggregations/_helpers.sql | 104 ++++++++++++++++++ macros/activity_schema/dataset/utils.sql | 27 ----- 3 files changed, 138 insertions(+), 42 deletions(-) delete mode 100644 macros/activity_schema/dataset/utils.sql diff --git a/macros/activity_schema/dataset/_build_dataset.sql b/macros/activity_schema/dataset/_build_dataset.sql index 9d7324c..97f3a58 100644 --- a/macros/activity_schema/dataset/_build_dataset.sql +++ b/macros/activity_schema/dataset/_build_dataset.sql @@ -85,9 +85,9 @@ time_spine_entities as ( {%- for column in primary_activity.columns %} {{ dbt_activity_schema.select_column(stream, primary, column).column_sql }} as {{column.alias}}, {%- endfor %} - date_trunc('{{primary_activity.interval}}', min({{primary}}.{{columns.ts}})) as period_start, - {% if primary_activity.end_period=='current' %}current_timestamp{% else %}date_trunc('{{primary_activity.interval}}', max({{primary}}.{{columns.ts}})){% endif %} as period_end, - date_diff('{{primary_activity.interval}}', period_start::timestamp, {% if primary_activity.end_period=='current' %}current_timestamp{% else %}date_trunc('{{primary_activity.interval}}', max({{primary}}.{{columns.ts}})){% endif %}::timestamp) as active_periods + {{dbt_activity_schema.date_trunc(primary_activity.interval, 'min('~columns.ts~')')}} as period_start, + {{dbt_activity_schema.end_period_expression(primary_activity.end_period, columns.ts)}} as period_end, + {{dbt_activity_schema.date_diff(primary_activity.interval, dbt_activity_schema.date_trunc(primary_activity.interval, 'min('~columns.ts~')'), dbt_activity_schema.end_period_expression(primary_activity.end_period, columns.ts))}} as active_periods from {% if primary_activity.filters is none %}{% if not skip_stream %}{{ stream_relation }}{% else %}{{ ref(primary_activity.model_name) }}{% endif %}{% else %}{{primary_activity_alias}}{{fs}}{% endif %} as {{primary}} where true {% if not skip_stream %} @@ -109,34 +109,53 @@ time_spine_metadata as ( from time_spine_entities ), number_spine as ( +{% if target.type != 'bigquery' %} with recursive number_spine as ( - select 1 as n -- start the spine at 1 + select 0 as n -- start the spine at 1 union all select n + 1 from number_spine where n <= (select max_active_periods from time_spine_metadata) -- adjust the upper limit as needed ) -select * from number_spine), -{{primary_activity_alias}} as ( +select * from number_spine +{% else %} +select n +from unnest(generate_array(0, 10000)) as n +{% endif %} +), +joined_time_spine as ( select {%- for column in primary_activity.columns %} tse.{{column.alias}}, {%- endfor %} - ns.n-1 as n0, - {{ dbt_activity_schema.dateadd(primary_activity.interval, 'n0', 'tse.period_start') }} as {{columns.ts}}, - {{ dbt_activity_schema.dateadd(primary_activity.interval, 1, columns.ts) }} as {{columns.activity_repeated_at}}, - ns.n as {{columns.activity_occurrence}}, - md5(tse.{{req}}{{columns.customer}} || {{columns.ts}}) as {{req}}{{columns.activity_id}}, - tse.{{req}}{{columns.customer}}, + ns.n, + tse.period_start, + {{ dbt_activity_schema.dateadd(primary_activity.interval, 'ns.n', 'tse.period_start') }} as {{columns.ts}}, {% if columns.anonymous_customer_id is defined %} tse.{{req}}{{columns.anonymous_customer_id}}, {% endif %} - {{columns.ts}} as {{req}}{{columns.ts}}, - {{columns.activity_occurrence}} as {{req}}{{columns.activity_occurrence}}, - {{columns.activity_repeated_at}} as {{req}}{{columns.activity_repeated_at}} + tse.{{req}}{{columns.customer}} from time_spine_entities tse left join number_spine ns on tse.active_periods >= ns.n +), +{{primary_activity_alias}} as ( + select + {%- for column in primary_activity.columns %} + {{column.alias}}, + {%- endfor %} + {{columns.ts}}, + {{ dbt_activity_schema.dateadd(primary_activity.interval, 1, columns.ts) }} as {{columns.activity_repeated_at}}, + n+1 as {{columns.activity_occurrence}}, + {{dbt_activity_schema.md5(req~columns.customer~' || '~columns.ts)}} as {{req}}{{columns.activity_id}}, + {{req}}{{columns.customer}}, + {% if columns.anonymous_customer_id is defined %} + {{req}}{{columns.anonymous_customer_id}}, + {% endif %} + {{columns.ts}} as {{req}}{{columns.ts}}, + n+1 as {{req}}{{columns.activity_occurrence}}, + {{ dbt_activity_schema.dateadd(primary_activity.interval, 1, columns.ts) }} as {{req}}{{columns.activity_repeated_at}}, + from joined_time_spine ) {% else %} {% if primary_activity.filters is not none %} diff --git a/macros/activity_schema/dataset/aggregations/_helpers.sql b/macros/activity_schema/dataset/aggregations/_helpers.sql index c2aad63..856fca5 100644 --- a/macros/activity_schema/dataset/aggregations/_helpers.sql +++ b/macros/activity_schema/dataset/aggregations/_helpers.sql @@ -34,3 +34,107 @@ {% macro bigquery__type_json() %} {%- do return("json") -%} {% endmacro %} + +{% macro end_period_expression(end_period, ts_col) %} +{%- if end_period == 'current' -%} +{{dbt_activity_schema.current_timestamp()}} +{%- elif end_period == 'max' -%} +max({{dbt_activity_schema.primary()}}.{{ts_col}}) +{%- else -%} +{%- endif -%} +{% endmacro %} + + +{% macro current_timestamp() %} + {{ return(adapter.dispatch("current_timestamp", "dbt_activity_schema")())}} +{% endmacro %} + +{% macro default__current_timestamp() %} +current_timestamp +{% endmacro %} + +{% macro bigquery__current_timestamp() %} +current_timestamp() +{% endmacro %} + + +{% macro dateadd(interval, periods, ts) %} + {{ return(adapter.dispatch('dateadd', 'dbt_activity_schema')(interval, periods, ts)) }} +{% endmacro %} + +{% macro default__dateadd(interval, periods, ts) %} +dateadd({{interval}}, {{periods}}, {{ts}}) +{% endmacro %} + +{% macro duckdb__dateadd(interval, periods, ts) %} +date_add({{ts}}, {{periods}} * interval 1 {{interval}}) +{% endmacro %} + +{% macro snowflake__dateadd(interval, periods, ts) %} +dateadd({{interval}}, {{periods}}, {{ts}}) +{% endmacro %} + +{% macro redshift__dateadd(interval, periods, ts) %} +dateadd({{interval}}, {{periods}}, {{ts}}) +{% endmacro %} + +{% macro bigquery__dateadd(interval, periods, ts) %} +date_add({{ts}}, interval {{periods}} {{interval}}) +{% endmacro %} + + +{% macro to_timestamp(ts) %} + {{ return(adapter.dispatch("to_timestamp", "dbt_activity_schema")(ts))}} +{% endmacro %} + +{% macro default__to_timestamp(ts) %} +{{ts}}::timestamp +{% endmacro %} + +{% macro bigquery__to_timestamp(ts) %} +timestamp({{ts}}) +{% endmacro %} + + + +{% macro date_trunc(period, ts) %} + {{ return(adapter.dispatch("date_trunc", "dbt_activity_schema")(period, ts))}} +{% endmacro %} + +{% macro default__date_trunc(period, ts) %} +date_trunc('{{period}}', {{ts}}) +{% endmacro %} + +{% macro bigquery__date_trunc(period, ts) %} +date_trunc({{ts}}, {{period}}) +{% endmacro %} + + +{% macro date_diff(period, start_ts, end_ts) %} + {{ return(adapter.dispatch("date_diff", "dbt_activity_schema")(period, start_ts, end_ts))}} +{% endmacro %} + +{% macro default__date_diff(period, start_ts, end_ts) %} +datediff('{{period}}', {{start_ts}}, {{end_ts}}) +{% endmacro %} + +{% macro duckdb__date_diff(period, start_ts, end_ts) %} +date_diff('{{period}}', {{start_ts}}, {{end_ts}}) +{% endmacro %} + +{% macro bigquery__date_diff(period, start_ts, end_ts) %} +date_diff({{end_ts}}, {{start_ts}}, {{period}}) +{% endmacro %} + + +{% macro md5(expr) %} + {{ return(adapter.dispatch("md5", "dbt_activity_schema")(expr))}} +{% endmacro %} + +{% macro default__md5(expr) %} +md5({{expr}}) +{% endmacro %} + +{% macro bigquery__md5(expr) %} +to_hex(md5({{expr}})) +{% endmacro %} diff --git a/macros/activity_schema/dataset/utils.sql b/macros/activity_schema/dataset/utils.sql deleted file mode 100644 index 5053621..0000000 --- a/macros/activity_schema/dataset/utils.sql +++ /dev/null @@ -1,27 +0,0 @@ -{% macro dateadd(interval, periods, ts) %} - {{ return(adapter.dispatch('dateadd', 'dbt_activity_schema')(interval, periods, ts)) }} -{% endmacro %} - -{% macro default__dateadd(interval, periods, ts) %} -dateadd({{interval}}, {{periods}}, {{ts}}) -{% endmacro %} - -{% macro duckdb__dateadd(interval, periods, ts) %} -date_add({{ts}}, {{periods}} * interval 1 {{interval}}) -{% endmacro %} - -{% macro snowflake__dateadd(interval, periods, ts) %} -dateadd({{interval}}, {{periods}}, {{ts}}) -{% endmacro %} - -{% macro redshift__dateadd(interval, periods, ts) %} -dateadd({{interval}}, {{periods}}, {{ts}}) -{% endmacro %} - -{% macro bigquery__dateadd(interval, periods, ts) %} -date_add({{ts}}, {{periods}} * interval 1 {{interval}}) -{% endmacro %} - - - -SELECT DATE_ADD(DATE('2025-01-01'), INTERVAL 7 DAY) AS new_date; From 9b3eb455e39b85e43bf59bef234018c67d19f6f0 Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Fri, 17 Jan 2025 00:40:58 +0700 Subject: [PATCH 4/4] fix time spine test bugs --- integration_tests/models/datasets/time_spine/schema.yml | 6 ++++++ integration_tests/seeds/datasets/seed_schema.yml | 6 +++++- .../seeds/datasets/time_spine/output__time_spine.csv | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/integration_tests/models/datasets/time_spine/schema.yml b/integration_tests/models/datasets/time_spine/schema.yml index f2954c0..4691b5c 100644 --- a/integration_tests/models/datasets/time_spine/schema.yml +++ b/integration_tests/models/datasets/time_spine/schema.yml @@ -7,3 +7,9 @@ models: tests: - dbt_utils.equality: compare_model: ref("output__time_spine") + compare_columns: + - ts + - customer_id + - total_items_purchased_between + - total_sales_between + - total_purchases_between \ No newline at end of file diff --git a/integration_tests/seeds/datasets/seed_schema.yml b/integration_tests/seeds/datasets/seed_schema.yml index 3297b03..9aefa6c 100644 --- a/integration_tests/seeds/datasets/seed_schema.yml +++ b/integration_tests/seeds/datasets/seed_schema.yml @@ -80,4 +80,8 @@ seeds: column_types: first_before_referrer_url: "{{ 'string' if target.name == 'bigquery' else 'text' }}" second_pageview_at: "{{ 'datetime' if target.name == 'bigquery' else 'timestamp' }}" - \ No newline at end of file + + - name: output__time_spine + config: + column_types: + ts: "{{ 'datetime' if target.name == 'bigquery' else 'timestamp' }}" diff --git a/integration_tests/seeds/datasets/time_spine/output__time_spine.csv b/integration_tests/seeds/datasets/time_spine/output__time_spine.csv index 4b88278..e221baf 100644 --- a/integration_tests/seeds/datasets/time_spine/output__time_spine.csv +++ b/integration_tests/seeds/datasets/time_spine/output__time_spine.csv @@ -2,12 +2,16 @@ ts,activity_id,customer_id,total_items_purchased_between,total_sales_between,tot 2022-01-01,ac78479a9f4e0a82141e63285e0bd937,1,6,350,2 2022-02-01,40baeaad6b5e05afb0f2346f3f73f2c4,1,5,250,1 2022-03-01,c258c810fde59ef5e4d5f0c5b3b59c50,1,0,0,0 +2022-04-01,6dc2dde9fdba51de24cd3d537dd974c7,1,0,0,0 2022-01-01,d242adbbac9ebdbba8caf4d669a13db8,4,2,80,1 2022-02-01,b040c6925c8a8b4fda30d87bb3c828fe,4,6,600,2 2022-03-01,414fd7624da769643b82b680a0476d01,4,0,0,0 +2022-04-01,9e0dfbdab056ac46796b461ce49845c2,4,0,0,0 2022-01-01,e93fa0bce5965387e1ee99086d13ebba,7,3,120,1 2022-02-01,6b62dceb80db68b6260c1ab313a13d36,7,2,150,2 2022-03-01,e554eceabffb4739dffa03e6d96d90b0,7,0,0,0 +2022-04-01,90e45d8e8f5ab2fe50745d01e119e2c4,7,0,0,0 2022-01-01,5168a14e1555818beb23a394f9e2f95f,10,4,50,1 2022-02-01,91ee7a6ca7f704b424afae0af772078c,10,18,1600,2 2022-03-01,97e03bb3fa450367f3fff74634a2f886,10,0,0,0 +2022-04-01,0deea889ea5d00d2c598ad774d598c72,10,0,0,0