diff --git a/dbt_project.yml b/dbt_project.yml index f06ff3e9..e66cfcbf 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -57,6 +57,10 @@ vars: primary_key_test_macros: [["dbt.test_unique", "dbt.test_not_null"], ["dbt_utils.test_unique_combination_of_columns"]] + # -- Graph variables -- + # node types to test for primary key coverage. acceptable node types: model, source, snapshot, seed + enforced_primary_key_node_types: ["model"] + # -- DAG variables -- models_fanout_threshold: 3 diff --git a/docs/customization/overriding-variables.md b/docs/customization/overriding-variables.md index 9e41269e..666f683d 100644 --- a/docs/customization/overriding-variables.md +++ b/docs/customization/overriding-variables.md @@ -9,6 +9,7 @@ Currently, this package uses different variables to adapt the models to your obj | `test_coverage_target` | the minimum acceptable test coverage percentage | 100% | | `documentation_coverage_target` | the minimum acceptable documentation coverage percentage | 100% | | `primary_key_test_macros` | the set(s) of dbt tests used to check validity of a primary key | `[["dbt.test_unique", "dbt.test_not_null"], ["dbt_utils.test_unique_combination_of_columns"]]` | +| `enforced_primary_key_node_types` | the set of node types for you you would like to enforce primary key test coverage. Valid options to include are `model`, `source`, `snapshot`, `seed` | `["model"]` **Usage notes for `primary_key_test_macros:`** @@ -21,7 +22,7 @@ For each entry in the parent list, the logic in `int_model_test_summary` will ev Each set of test(s) that define a primary key requirement must be grouped together in a sub-list to ensure they are evaluated together (e.g. [`dbt.test_unique`, `dbt.test_not_null`] ). -*While it's not explicitly tested in this package, we strongly encourage adding a `not_null` test on each of the columns listed in the `dbt_utils.unique_combination_of_columns` tests.* +*While it's not explicitly tested in this package, we strongly encourage adding a `not_null` test on each of the columns listed in the `dbt_utils.unique_combination_of_columns` tests. Alternatively, on Snowflake, consider `dbt_constraints.test_primary_key` in the [dbt Constraints](https://github.com/Snowflake-Labs/dbt_constraints) package, which enforces each field in the primary key is non null.* ```yaml title="dbt_project.yml" # set your test and doc coverage to 75% instead diff --git a/docs/rules/testing.md b/docs/rules/testing.md index 7eae1792..e1c1b654 100644 --- a/docs/rules/testing.md +++ b/docs/rules/testing.md @@ -21,6 +21,12 @@ Apply a [uniqueness test](https://docs.getdbt.com/reference/resource-properties/ Additional tests can be configured by applying a [generic test](https://docs.getdbt.com/docs/building-a-dbt-project/tests#generic-tests) in the model's `.yml` entry or by creating a [singular test](https://docs.getdbt.com/docs/building-a-dbt-project/tests#singular-tests) in the `tests` directory of you project. +**Enforcing on more node types(Advanced)** + +You can optionally extend this test to apply to more node types (`source`,`snapshot`, `seed`). By configuring the variable `enforced_primary_key_node_types` to be a set of node types for which you wish to enforce primary key test coverage in addition to (or instead of) just models. Check out the [overriding variables section](../customization/overriding-variables.md) for instructions + +Snapshots should always have a multi-field primary key in order to function, while sources and seeds may not. Depending on your expectations for duplicates and null values, different kinds of primary key tests may be appropriate. Consider your use case carefully. + --- ## Test Coverage diff --git a/models/marts/tests/fct_missing_primary_key_tests.sql b/models/marts/tests/fct_missing_primary_key_tests.sql index 37bd0c29..8d1227e3 100644 --- a/models/marts/tests/fct_missing_primary_key_tests.sql +++ b/models/marts/tests/fct_missing_primary_key_tests.sql @@ -1,13 +1,12 @@ -{{ - config( - alias = 'my_alias', - ) -}} - with tests as ( select * from {{ ref('int_model_test_summary') }} + where resource_type in + ( + {% for resource_type in var('enforced_primary_key_node_types') %}'{{ resource_type }}'{% if not loop.last %},{% endif %} + {% endfor %} + ) ), final as ( diff --git a/models/marts/tests/fct_test_coverage.sql b/models/marts/tests/fct_test_coverage.sql index 872706bb..504aa22a 100644 --- a/models/marts/tests/fct_test_coverage.sql +++ b/models/marts/tests/fct_test_coverage.sql @@ -2,6 +2,7 @@ with test_counts as ( select * from {{ ref('int_model_test_summary') }} + where resource_type = 'model' ), conversion as ( diff --git a/models/marts/tests/intermediate/int_model_test_summary.sql b/models/marts/tests/intermediate/int_model_test_summary.sql index 54116d25..4b430230 100644 --- a/models/marts/tests/intermediate/int_model_test_summary.sql +++ b/models/marts/tests/intermediate/int_model_test_summary.sql @@ -56,13 +56,15 @@ agg_test_relationships as ( final as ( select all_graph_resources.resource_name, + all_graph_resources.resource_type, all_graph_resources.model_type, coalesce(agg_test_relationships.is_primary_key_tested, FALSE) as is_primary_key_tested, coalesce(agg_test_relationships.number_of_tests_on_model, 0) as number_of_tests_on_model from all_graph_resources left join agg_test_relationships on all_graph_resources.resource_id = agg_test_relationships.direct_parent_id - where all_graph_resources.resource_type = 'model' + where + all_graph_resources.resource_type in ('model', 'seed', 'source', 'snapshot') ) select * from final