Skip to content

Commit

Permalink
Merge pull request #39 from Snowflake-Labs/37-feature-request-add-con…
Browse files Browse the repository at this point in the history
…straints-to-seeds

Added support for seeds
  • Loading branch information
sfc-gh-dflippo authored Dec 5, 2022
2 parents a7e2da6 + e5c12ba commit 7d7557f
Show file tree
Hide file tree
Showing 13 changed files with 239 additions and 23 deletions.
102 changes: 102 additions & 0 deletions integration_tests/data/tpc_h_seeds/seeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
version: 2

seeds:

- name: part
columns:
- name: p_partkey
description: "The primary key for this table"
tests:
- unique
- not_null

- name: partsupp
columns:
- name: ps_partkey
description: "Part of compound primary key for this table"
tests:
- not_null
- relationships:
to: ref('part')
field: p_partkey
- name: ps_suppkey
description: "Part of compound primary key for this table"
tests:
- not_null
- dbt_constraints.foreign_key:
pk_table_name: ref('supplier')
pk_column_name: s_suppkey
tests:
# This is a higher performance way to test compound PK/UK
- dbt_constraints.unique_key:
column_names:
- ps_partkey
- ps_suppkey
# How to validate a compound primary key natively
- unique:
column_name: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')"

- name: supplier
columns:
- name: s_suppkey
description: "The primary key for this table"
tests:
- unique
- not_null
- name: s_nationkey
tests:
- not_null

- name: orders
columns:
- name: o_orderkey
description: "The primary key for this table"
tests:
- unique
- not_null
- name: o_custkey
tests:
- not_null
- relationships:
to: ref('customer')
field: c_custkey

- name: customer
columns:
- name: c_custkey
description: "The primary key for dim_customers"
tests:
- dbt_constraints.primary_key
- name: c_name
description: "Customer Name"
tests:
- not_null
- name: c_nationkey
tests:
- not_null

- name: lineitem
columns:
- name: l_orderkey
tests:
- not_null
- relationships:
to: ref('orders')
field: o_orderkey
- name: l_linenumber
tests:
- not_null
tests:
# This is a higher performance way to test compound PK/UK
- dbt_constraints.unique_key:
column_names:
- l_orderkey
- l_linenumber
# How to validate a compound primary key natively
- unique:
column_name: "coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '')"
# How to validate a compound foreign key
- relationships:
column_name: "coalesce(cast(l_partkey as varchar(100)), '') || '~' || coalesce(cast(l_suppkey as varchar(100)), '')"
to: ref('partsupp')
field: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')"
3 changes: 2 additions & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@ models:

seeds:
+quote_columns: false
+full_refresh: false
+post-hook: "{{ clone_table('source_') }}"
#+full_refresh: false
75 changes: 75 additions & 0 deletions integration_tests/macros/clone_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{%- macro clone_table(new_prefix) -%}
{{ return(adapter.dispatch('clone_table')(new_prefix)) }}
{%- endmacro -%}


{%- macro snowflake__clone_table(new_prefix) -%}
{%- set table_clone = api.Relation.create(
database = this.database,
schema = this.schema,
identifier = new_prefix ~ this.identifier ) -%}

{%- set clone_statement -%}
create or replace table {{table_clone}} clone {{this}}
{%- endset -%}
{%- do log("Creating table clone: " ~ table_clone, info=false) -%}
{%- do run_query(clone_statement) -%}

{%- endmacro -%}


{%- macro postgres__clone_table(new_prefix) -%}
{%- set table_clone = api.Relation.create(
database = this.database,
schema = this.schema,
identifier = new_prefix ~ this.identifier ) -%}

{%- set clone_statement -%}
drop table if exists {{table_clone}}
{%- endset -%}
{%- do log("Drop table if exists: " ~ table_clone, info=false) -%}

{%- set clone_statement -%}
create table {{table_clone}} as select * from {{this}}
{%- endset -%}
{%- do log("Creating table clone: " ~ table_clone, info=false) -%}
{%- do run_query(clone_statement) -%}

{%- endmacro -%}


{%- macro oracle__clone_table(new_prefix) -%}
{%- set table_clone = api.Relation.create(
database = this.database,
schema = this.schema,
identifier = new_prefix ~ this.identifier ) -%}

{%- set clone_statement -%}
DECLARE
tbl_count number;
sql_stmt long;

BEGIN
SELECT COUNT(*) INTO tbl_count
FROM dba_tables
WHERE owner = '{{table_clone.schema}}'
AND table_name = '{{table_clone.identifier}}';

IF(tbl_count <> 0)
THEN
sql_stmt:='DROP TABLE {{table_clone}}';
EXECUTE IMMEDIATE sql_stmt;
END IF;
END;
{%- endset -%}
{%- do log("Drop table if exists: " ~ table_clone, info=false) -%}

{%- set clone_statement -%}
create table {{table_clone}} as select * from {{this}}
{%- endset -%}
{%- do log("Creating table clone: " ~ table_clone, info=false) -%}
{%- do run_query(clone_statement) -%}

{%- endmacro -%}


2 changes: 1 addition & 1 deletion integration_tests/models/dim_customers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
*/
SELECT C.*,
DENSE_RANK() over (order by c_custkey) as c_custkey_seq
FROM {{ source('tpc_h', 'customer') }} C
FROM {{ ref('customer') }} C
2 changes: 1 addition & 1 deletion integration_tests/models/dim_orders.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ SELECT
O.*,
DENSE_RANK() over (order by o_orderkey) as o_orderkey_seq
FROM
{{ source('tpc_h', 'orders') }} O
{{ ref('orders') }} O
2 changes: 1 addition & 1 deletion integration_tests/models/dim_part.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
SELECT
P.*,
DENSE_RANK() over (order by p_partkey) as p_partkey_seq
FROM {{ source('tpc_h', 'part') }} P
FROM {{ ref('part') }} P
2 changes: 1 addition & 1 deletion integration_tests/models/dim_part_supplier.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
All Part Suppliers
*/
SELECT PS.*
FROM {{ source('tpc_h', 'partsupp') }} PS
FROM {{ source('tpc_h', 'source_partsupp') }} PS
2 changes: 1 addition & 1 deletion integration_tests/models/dim_part_supplier_missing_con.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
All Part Suppliers
*/
SELECT PS.*
FROM {{ source('tpc_h', 'partsupp') }} PS
FROM {{ source('tpc_h', 'source_partsupp') }} PS
4 changes: 2 additions & 2 deletions integration_tests/models/fact_order_line.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ SELECT
lineitem.*,
cast(TO_CHAR(o_orderdate, 'YYYYMMDD') AS INTEGER) AS o_orderdate_key,
coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '') AS integration_id
FROM {{ source('tpc_h', 'lineitem') }} lineitem
JOIN {{ source('tpc_h', 'orders') }} orders ON l_orderkey = o_orderkey
FROM {{ ref('lineitem') }} lineitem
JOIN {{ ref('orders') }} orders ON l_orderkey = o_orderkey

{% if is_incremental() -%}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
SELECT
lineitem.*,
coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '') AS integration_id
FROM {{ source('tpc_h', 'lineitem') }} lineitem
FROM {{ ref('lineitem') }} lineitem
6 changes: 5 additions & 1 deletion integration_tests/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,11 @@ models:
- not_null
# Testing FK to a source
- relationships:
to: source('tpc_h', 'supplier')
to: source('tpc_h', 'source_supplier')
field: s_suppkey
# Testing FK to a seed
- relationships:
to: ref('supplier')
field: s_suppkey
tests:
- dbt_constraints.unique_key:
Expand Down
54 changes: 44 additions & 10 deletions integration_tests/models/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,44 @@ sources:
- name: tpc_h
schema: "{{ target.schema }}"
tables:
- name: part
- name: source_part
columns:
- name: p_partkey
description: "The primary key for this table"
tests:
- unique
- not_null
- relationships:
to: ref('part')
field: p_partkey

- name: partsupp
- name: source_partsupp
columns:
- name: ps_partkey
description: "Part of compound primary key for this table"
tests:
- not_null
- relationships:
to: source('tpc_h', 'part')
to: source('tpc_h', 'source_part')
field: p_partkey
- relationships:
to: ref('partsupp')
field: ps_partkey
- relationships:
to: ref('part')
field: p_partkey
- name: ps_suppkey
description: "Part of compound primary key for this table"
tests:
- not_null
- relationships:
to: source('tpc_h', 'supplier')
to: source('tpc_h', 'source_supplier')
field: s_suppkey
- relationships:
to: ref('partsupp')
field: ps_suppkey
- relationships:
to: ref('supplier')
field: s_suppkey
tests:
# This is a higher performance way to test compound PK/UK
Expand All @@ -41,38 +56,47 @@ sources:
- unique:
column_name: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')"

- name: "supplier"
- name: "source_supplier"
columns:
- name: "s_suppkey"
description: "The primary key for this table"
tests:
- unique
- not_null
- relationships:
to: ref('supplier')
field: s_suppkey
- name: s_nationkey
tests:
- not_null

- name: orders
- name: source_orders
columns:
- name: o_orderkey
description: "The primary key for this table"
tests:
- unique
- not_null
- relationships:
to: ref('orders')
field: o_orderkey
- name: o_custkey
tests:
- not_null
- relationships:
to: source('tpc_h', 'customer')
to: source('tpc_h', 'source_customer')
field: c_custkey

- name: customer
- name: source_customer
columns:
- name: c_custkey
description: "The primary key for dim_customers"
tests:
- unique
- not_null
- relationships:
to: ref('customer')
field: c_custkey
- name: c_name
description: "Customer Name"
tests:
Expand All @@ -81,13 +105,13 @@ sources:
tests:
- not_null

- name: lineitem
- name: source_lineitem
columns:
- name: l_orderkey
tests:
- not_null
- relationships:
to: source('tpc_h', 'orders')
to: source('tpc_h', 'source_orders')
field: o_orderkey
- name: l_linenumber
tests:
Expand All @@ -107,3 +131,13 @@ sources:
column_name: "coalesce(cast(l_partkey as varchar(100)), '') || '~' || coalesce(cast(l_suppkey as varchar(100)), '')"
to: source('tpc_h', 'partsupp')
field: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')"

# multi-column FK
- dbt_constraints.foreign_key:
fk_column_names:
- l_orderkey
- l_linenumber
pk_table_name: ref('lineitem')
pk_column_names:
- l_orderkey
- l_linenumber
6 changes: 3 additions & 3 deletions macros/create_constraints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@
These models must be physical tables and cannot be sources -#}
{%- set table_models = [] -%}
{%- for node in graph.nodes.values() | selectattr("unique_id", "in", test_model.depends_on.nodes)
if node.resource_type in ( ( "model", "snapshot") )
if node.config.materialized in( ("table", "incremental", "snapshot") ) -%}
if node.resource_type in ( ( "model", "snapshot", "seed") )
if node.config.materialized in( ("table", "incremental", "snapshot", "seed") ) -%}

{#- Append to our list of models &or snapshots for this test -#}
{%- do table_models.append(node) -%}
Expand Down Expand Up @@ -245,7 +245,7 @@

{%- set fk_model = none -%}
{%- set pk_model = none -%}
{%- set fk_model_names = modules.re.findall( "(models|snapshots)\W+(\w+)" , test_model.file_key_name) -%}
{%- set fk_model_names = modules.re.findall( "(models|snapshots|seeds)\W+(\w+)" , test_model.file_key_name) -%}
{%- set fk_source_names = modules.re.findall( "source\W+(\w+)\W+(\w+)" , test_parameters.model) -%}

{%- if 1 == fk_model_names | count -%}
Expand Down

0 comments on commit 7d7557f

Please sign in to comment.