From e5c12baec52f7ad054ae58a889d7e16eaee890f4 Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Mon, 5 Dec 2022 14:43:19 -0500 Subject: [PATCH] Added support for seeds --- integration_tests/data/tpc_h_seeds/seeds.yml | 102 ++++++++++++++++++ integration_tests/dbt_project.yml | 3 +- integration_tests/macros/clone_table.sql | 75 +++++++++++++ integration_tests/models/dim_customers.sql | 2 +- integration_tests/models/dim_orders.sql | 2 +- integration_tests/models/dim_part.sql | 2 +- .../models/dim_part_supplier.sql | 2 +- .../models/dim_part_supplier_missing_con.sql | 2 +- integration_tests/models/fact_order_line.sql | 4 +- .../models/fact_order_line_missing_orders.sql | 2 +- integration_tests/models/schema.yml | 6 +- integration_tests/models/sources.yml | 54 ++++++++-- macros/create_constraints.sql | 6 +- 13 files changed, 239 insertions(+), 23 deletions(-) create mode 100644 integration_tests/data/tpc_h_seeds/seeds.yml create mode 100644 integration_tests/macros/clone_table.sql diff --git a/integration_tests/data/tpc_h_seeds/seeds.yml b/integration_tests/data/tpc_h_seeds/seeds.yml new file mode 100644 index 0000000..fe58be2 --- /dev/null +++ b/integration_tests/data/tpc_h_seeds/seeds.yml @@ -0,0 +1,102 @@ +version: 2 + +seeds: + + - name: part + columns: + - name: p_partkey + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: partsupp + columns: + - name: ps_partkey + description: "Part of compound primary key for this table" + tests: + - not_null + - relationships: + to: ref('part') + field: p_partkey + - name: ps_suppkey + description: "Part of compound primary key for this table" + tests: + - not_null + - dbt_constraints.foreign_key: + pk_table_name: ref('supplier') + pk_column_name: s_suppkey + tests: + # This is a higher performance way to test compound PK/UK + - dbt_constraints.unique_key: + column_names: + - ps_partkey + - ps_suppkey + # How to validate a compound primary key natively + - unique: + column_name: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')" + + - name: supplier + columns: + - name: s_suppkey + description: "The primary key for this table" + tests: + - unique + - not_null + - name: s_nationkey + tests: + - not_null + + - name: orders + columns: + - name: o_orderkey + description: "The primary key for this table" + tests: + - unique + - not_null + - name: o_custkey + tests: + - not_null + - relationships: + to: ref('customer') + field: c_custkey + + - name: customer + columns: + - name: c_custkey + description: "The primary key for dim_customers" + tests: + - dbt_constraints.primary_key + - name: c_name + description: "Customer Name" + tests: + - not_null + - name: c_nationkey + tests: + - not_null + + - name: lineitem + columns: + - name: l_orderkey + tests: + - not_null + - relationships: + to: ref('orders') + field: o_orderkey + - name: l_linenumber + tests: + - not_null + tests: + # This is a higher performance way to test compound PK/UK + - dbt_constraints.unique_key: + column_names: + - l_orderkey + - l_linenumber + # How to validate a compound primary key natively + - unique: + column_name: "coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '')" + # How to validate a compound foreign key + - relationships: + column_name: "coalesce(cast(l_partkey as varchar(100)), '') || '~' || coalesce(cast(l_suppkey as varchar(100)), '')" + to: ref('partsupp') + field: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')" diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 34588e5..e484ac3 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -48,4 +48,5 @@ models: seeds: +quote_columns: false - +full_refresh: false + +post-hook: "{{ clone_table('source_') }}" + #+full_refresh: false diff --git a/integration_tests/macros/clone_table.sql b/integration_tests/macros/clone_table.sql new file mode 100644 index 0000000..cf293cf --- /dev/null +++ b/integration_tests/macros/clone_table.sql @@ -0,0 +1,75 @@ +{%- macro clone_table(new_prefix) -%} + {{ return(adapter.dispatch('clone_table')(new_prefix)) }} +{%- endmacro -%} + + +{%- macro snowflake__clone_table(new_prefix) -%} + {%- set table_clone = api.Relation.create( + database = this.database, + schema = this.schema, + identifier = new_prefix ~ this.identifier ) -%} + + {%- set clone_statement -%} + create or replace table {{table_clone}} clone {{this}} + {%- endset -%} + {%- do log("Creating table clone: " ~ table_clone, info=false) -%} + {%- do run_query(clone_statement) -%} + +{%- endmacro -%} + + +{%- macro postgres__clone_table(new_prefix) -%} + {%- set table_clone = api.Relation.create( + database = this.database, + schema = this.schema, + identifier = new_prefix ~ this.identifier ) -%} + + {%- set clone_statement -%} + drop table if exists {{table_clone}} + {%- endset -%} + {%- do log("Drop table if exists: " ~ table_clone, info=false) -%} + + {%- set clone_statement -%} + create table {{table_clone}} as select * from {{this}} + {%- endset -%} + {%- do log("Creating table clone: " ~ table_clone, info=false) -%} + {%- do run_query(clone_statement) -%} + +{%- endmacro -%} + + +{%- macro oracle__clone_table(new_prefix) -%} + {%- set table_clone = api.Relation.create( + database = this.database, + schema = this.schema, + identifier = new_prefix ~ this.identifier ) -%} + + {%- set clone_statement -%} +DECLARE +tbl_count number; +sql_stmt long; + +BEGIN + SELECT COUNT(*) INTO tbl_count + FROM dba_tables + WHERE owner = '{{table_clone.schema}}' + AND table_name = '{{table_clone.identifier}}'; + + IF(tbl_count <> 0) + THEN + sql_stmt:='DROP TABLE {{table_clone}}'; + EXECUTE IMMEDIATE sql_stmt; + END IF; +END; + {%- endset -%} + {%- do log("Drop table if exists: " ~ table_clone, info=false) -%} + + {%- set clone_statement -%} + create table {{table_clone}} as select * from {{this}} + {%- endset -%} + {%- do log("Creating table clone: " ~ table_clone, info=false) -%} + {%- do run_query(clone_statement) -%} + +{%- endmacro -%} + + diff --git a/integration_tests/models/dim_customers.sql b/integration_tests/models/dim_customers.sql index 6966916..7beaef8 100644 --- a/integration_tests/models/dim_customers.sql +++ b/integration_tests/models/dim_customers.sql @@ -4,4 +4,4 @@ */ SELECT C.*, DENSE_RANK() over (order by c_custkey) as c_custkey_seq -FROM {{ source('tpc_h', 'customer') }} C +FROM {{ ref('customer') }} C diff --git a/integration_tests/models/dim_orders.sql b/integration_tests/models/dim_orders.sql index 805d35d..b76b837 100644 --- a/integration_tests/models/dim_orders.sql +++ b/integration_tests/models/dim_orders.sql @@ -6,4 +6,4 @@ SELECT O.*, DENSE_RANK() over (order by o_orderkey) as o_orderkey_seq FROM -{{ source('tpc_h', 'orders') }} O +{{ ref('orders') }} O diff --git a/integration_tests/models/dim_part.sql b/integration_tests/models/dim_part.sql index 8d4cc89..2905fe0 100644 --- a/integration_tests/models/dim_part.sql +++ b/integration_tests/models/dim_part.sql @@ -5,4 +5,4 @@ SELECT P.*, DENSE_RANK() over (order by p_partkey) as p_partkey_seq -FROM {{ source('tpc_h', 'part') }} P +FROM {{ ref('part') }} P diff --git a/integration_tests/models/dim_part_supplier.sql b/integration_tests/models/dim_part_supplier.sql index 3eabdf1..2fd2135 100644 --- a/integration_tests/models/dim_part_supplier.sql +++ b/integration_tests/models/dim_part_supplier.sql @@ -2,4 +2,4 @@ All Part Suppliers */ SELECT PS.* -FROM {{ source('tpc_h', 'partsupp') }} PS +FROM {{ source('tpc_h', 'source_partsupp') }} PS diff --git a/integration_tests/models/dim_part_supplier_missing_con.sql b/integration_tests/models/dim_part_supplier_missing_con.sql index 3eabdf1..2fd2135 100644 --- a/integration_tests/models/dim_part_supplier_missing_con.sql +++ b/integration_tests/models/dim_part_supplier_missing_con.sql @@ -2,4 +2,4 @@ All Part Suppliers */ SELECT PS.* -FROM {{ source('tpc_h', 'partsupp') }} PS +FROM {{ source('tpc_h', 'source_partsupp') }} PS diff --git a/integration_tests/models/fact_order_line.sql b/integration_tests/models/fact_order_line.sql index 8ce15d0..eb6e46e 100644 --- a/integration_tests/models/fact_order_line.sql +++ b/integration_tests/models/fact_order_line.sql @@ -13,8 +13,8 @@ SELECT lineitem.*, cast(TO_CHAR(o_orderdate, 'YYYYMMDD') AS INTEGER) AS o_orderdate_key, coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '') AS integration_id -FROM {{ source('tpc_h', 'lineitem') }} lineitem -JOIN {{ source('tpc_h', 'orders') }} orders ON l_orderkey = o_orderkey +FROM {{ ref('lineitem') }} lineitem +JOIN {{ ref('orders') }} orders ON l_orderkey = o_orderkey {% if is_incremental() -%} diff --git a/integration_tests/models/fact_order_line_missing_orders.sql b/integration_tests/models/fact_order_line_missing_orders.sql index c59b799..3f24c59 100644 --- a/integration_tests/models/fact_order_line_missing_orders.sql +++ b/integration_tests/models/fact_order_line_missing_orders.sql @@ -5,4 +5,4 @@ SELECT lineitem.*, coalesce(cast(l_orderkey as varchar(100)), '') || '~' || coalesce(cast(l_linenumber as varchar(100)), '') AS integration_id -FROM {{ source('tpc_h', 'lineitem') }} lineitem +FROM {{ ref('lineitem') }} lineitem diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index 2b2412f..5cbcd23 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -213,7 +213,11 @@ models: - not_null # Testing FK to a source - relationships: - to: source('tpc_h', 'supplier') + to: source('tpc_h', 'source_supplier') + field: s_suppkey + # Testing FK to a seed + - relationships: + to: ref('supplier') field: s_suppkey tests: - dbt_constraints.unique_key: diff --git a/integration_tests/models/sources.yml b/integration_tests/models/sources.yml index 4653068..12c5f34 100644 --- a/integration_tests/models/sources.yml +++ b/integration_tests/models/sources.yml @@ -7,29 +7,44 @@ sources: - name: tpc_h schema: "{{ target.schema }}" tables: - - name: part + - name: source_part columns: - name: p_partkey description: "The primary key for this table" tests: - unique - not_null + - relationships: + to: ref('part') + field: p_partkey - - name: partsupp + - name: source_partsupp columns: - name: ps_partkey description: "Part of compound primary key for this table" tests: - not_null - relationships: - to: source('tpc_h', 'part') + to: source('tpc_h', 'source_part') + field: p_partkey + - relationships: + to: ref('partsupp') + field: ps_partkey + - relationships: + to: ref('part') field: p_partkey - name: ps_suppkey description: "Part of compound primary key for this table" tests: - not_null - relationships: - to: source('tpc_h', 'supplier') + to: source('tpc_h', 'source_supplier') + field: s_suppkey + - relationships: + to: ref('partsupp') + field: ps_suppkey + - relationships: + to: ref('supplier') field: s_suppkey tests: # This is a higher performance way to test compound PK/UK @@ -41,38 +56,47 @@ sources: - unique: column_name: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')" - - name: "supplier" + - name: "source_supplier" columns: - name: "s_suppkey" description: "The primary key for this table" tests: - unique - not_null + - relationships: + to: ref('supplier') + field: s_suppkey - name: s_nationkey tests: - not_null - - name: orders + - name: source_orders columns: - name: o_orderkey description: "The primary key for this table" tests: - unique - not_null + - relationships: + to: ref('orders') + field: o_orderkey - name: o_custkey tests: - not_null - relationships: - to: source('tpc_h', 'customer') + to: source('tpc_h', 'source_customer') field: c_custkey - - name: customer + - name: source_customer columns: - name: c_custkey description: "The primary key for dim_customers" tests: - unique - not_null + - relationships: + to: ref('customer') + field: c_custkey - name: c_name description: "Customer Name" tests: @@ -81,13 +105,13 @@ sources: tests: - not_null - - name: lineitem + - name: source_lineitem columns: - name: l_orderkey tests: - not_null - relationships: - to: source('tpc_h', 'orders') + to: source('tpc_h', 'source_orders') field: o_orderkey - name: l_linenumber tests: @@ -107,3 +131,13 @@ sources: column_name: "coalesce(cast(l_partkey as varchar(100)), '') || '~' || coalesce(cast(l_suppkey as varchar(100)), '')" to: source('tpc_h', 'partsupp') field: "coalesce(cast(ps_partkey as varchar(100)), '') || '~' || coalesce(cast(ps_suppkey as varchar(100)), '')" + + # multi-column FK + - dbt_constraints.foreign_key: + fk_column_names: + - l_orderkey + - l_linenumber + pk_table_name: ref('lineitem') + pk_column_names: + - l_orderkey + - l_linenumber diff --git a/macros/create_constraints.sql b/macros/create_constraints.sql index 0f1242e..ed37bf8 100644 --- a/macros/create_constraints.sql +++ b/macros/create_constraints.sql @@ -175,8 +175,8 @@ These models must be physical tables and cannot be sources -#} {%- set table_models = [] -%} {%- for node in graph.nodes.values() | selectattr("unique_id", "in", test_model.depends_on.nodes) - if node.resource_type in ( ( "model", "snapshot") ) - if node.config.materialized in( ("table", "incremental", "snapshot") ) -%} + if node.resource_type in ( ( "model", "snapshot", "seed") ) + if node.config.materialized in( ("table", "incremental", "snapshot", "seed") ) -%} {#- Append to our list of models &or snapshots for this test -#} {%- do table_models.append(node) -%} @@ -245,7 +245,7 @@ {%- set fk_model = none -%} {%- set pk_model = none -%} - {%- set fk_model_names = modules.re.findall( "(models|snapshots)\W+(\w+)" , test_model.file_key_name) -%} + {%- set fk_model_names = modules.re.findall( "(models|snapshots|seeds)\W+(\w+)" , test_model.file_key_name) -%} {%- set fk_source_names = modules.re.findall( "source\W+(\w+)\W+(\w+)" , test_parameters.model) -%} {%- if 1 == fk_model_names | count -%}