diff --git a/docs/rules/testing.md b/docs/rules/testing.md index e1c1b654..3c27e894 100644 --- a/docs/rules/testing.md +++ b/docs/rules/testing.md @@ -6,7 +6,8 @@ `fct_missing_primary_key_tests` ([source](https://github.com/dbt-labs/dbt-project-evaluator/tree/main/models/marts/tests/fct_missing_primary_key_tests.sql)) lists every model that does not meet the minimum testing requirement of testing primary keys. Any model that does not have either 1. a `not_null` test and a `unique` test applied to a single column OR -2. a `dbt_utils.unique_combination_of_columns` test applied to a set of columns +2. a `dbt_utils.unique_combination_of_columns` test applied to a set of columns OR +3. a `not_null` constraint and a `unique` test applied to a single column will be flagged by this model. @@ -16,7 +17,7 @@ Tests are assertions you make about your models and other resources in your dbt **How to Remediate** -Apply a [uniqueness test](https://docs.getdbt.com/reference/resource-properties/tests#unique) and a [not null test](https://docs.getdbt.com/reference/resource-properties/tests#not_null) to the column that represents the grain of your model in its schema entry. For models that are unique across a combination of columns, we recommend adding a surrogate key column to your model, then applying these tests to that new model. See the [`surrogate_key`](https://github.com/dbt-labs/dbt-utils#surrogate_key-source) macro from dbt_utils for more info! Alternatively, you can use the [`dbt_utils.unique_combination_of_columns`](https://github.com/dbt-labs/dbt-utils#unique_combination_of_columns-source) test from `dbt_utils`. Check out the [overriding variables section](../customization/overriding-variables.md) to read more about configuring other primary key tests for your project! +Apply a [uniqueness test](https://docs.getdbt.com/reference/resource-properties/tests#unique) and a [not null test](https://docs.getdbt.com/reference/resource-properties/tests#not_null) to the column that represents the grain of your model in its schema entry. For contracted models, optionally replace the not null test with the not null [constraint](https://docs.getdbt.com/reference/resource-properties/constraints). For models that are unique across a combination of columns, we recommend adding a surrogate key column to your model, then applying these tests to that new model. See the [`surrogate_key`](https://github.com/dbt-labs/dbt-utils#surrogate_key-source) macro from dbt_utils for more info! Alternatively, you can use the [`dbt_utils.unique_combination_of_columns`](https://github.com/dbt-labs/dbt-utils#unique_combination_of_columns-source) test from `dbt_utils`. Check out the [overriding variables section](../customization/overriding-variables.md) to read more about configuring other primary key tests for your project! Additional tests can be configured by applying a [generic test](https://docs.getdbt.com/docs/building-a-dbt-project/tests#generic-tests) in the model's `.yml` entry or by creating a [singular test](https://docs.getdbt.com/docs/building-a-dbt-project/tests#singular-tests) in the `tests` directory of you project. diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index 183a3e4d..392271f3 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -71,4 +71,4 @@ integration_tests: schema: dbt_project_evaluator_integration_tests_trino threads: 5 session_properties: - query_max_stage_count: 200 + query_max_stage_count: 275 diff --git a/integration_tests/models/marts/intermediate/_dim_model_7.yml b/integration_tests/models/marts/intermediate/_dim_model_7.yml new file mode 100644 index 00000000..6707dd10 --- /dev/null +++ b/integration_tests/models/marts/intermediate/_dim_model_7.yml @@ -0,0 +1,12 @@ +models: + - name: dim_model_7 + config: + contract: + enforced: true + columns: + - name: id + data_type: int + constraints: + - type: not_null + tests: + - unique diff --git a/integration_tests/seeds/tests/test_fct_missing_primary_key_tests.csv b/integration_tests/seeds/tests/test_fct_missing_primary_key_tests.csv index 46204a98..d4d2e3da 100644 --- a/integration_tests/seeds/tests/test_fct_missing_primary_key_tests.csv +++ b/integration_tests/seeds/tests/test_fct_missing_primary_key_tests.csv @@ -1,14 +1,13 @@ -resource_name,is_primary_key_tested,number_of_tests_on_model -dim_model_7,FALSE,0 -fct_model_6,FALSE,0 -model_8,FALSE,0 -fct_model_9,FALSE,0 -int_model_5.v1,FALSE,0 -int_model_5.v2,FALSE,0 -int_model_5.v3,FALSE,0 -report_1,FALSE,0 -report_2,FALSE,0 -report_3,FALSE,0 -stg_model_1,FALSE,2 -stg_model_5,FALSE,0 -fct_model_10,FALSE,0 +resource_name,is_primary_key_tested,number_of_tests_on_model,number_of_constraints_on_model +fct_model_6,FALSE,0,0 +model_8,FALSE,0,0 +fct_model_9,FALSE,0,0 +int_model_5.v1,FALSE,0,0 +int_model_5.v2,FALSE,0,0 +int_model_5.v3,FALSE,0,0 +report_1,FALSE,0,0 +report_2,FALSE,0,0 +report_3,FALSE,0,0 +stg_model_1,FALSE,2,0 +stg_model_5,FALSE,0,0 +fct_model_10,FALSE,0,0 diff --git a/integration_tests/seeds/tests/test_fct_test_coverage.csv b/integration_tests/seeds/tests/test_fct_test_coverage.csv index 7f2a65fb..16546318 100644 --- a/integration_tests/seeds/tests/test_fct_test_coverage.csv +++ b/integration_tests/seeds/tests/test_fct_test_coverage.csv @@ -1,2 +1,2 @@ total_models,total_tests,tested_models,test_coverage_pct,staging_test_coverage_pct,intermediate_test_coverage_pct,marts_test_coverage_pct,other_test_coverage_pct,test_to_model_ratio -18,13,6,33.33,80.00,25.00,0.00,25.00,0.7222 +18,14,7,38.89,80.00,25.00,20.00,25.00,0.7778 diff --git a/integration_tests/seeds/tests/tests_seeds.yml b/integration_tests/seeds/tests/tests_seeds.yml index de480a01..91aeabff 100644 --- a/integration_tests/seeds/tests/tests_seeds.yml +++ b/integration_tests/seeds/tests/tests_seeds.yml @@ -6,6 +6,9 @@ seeds: - dbt_utils.equality: name: equality_fct_missing_primary_key_tests compare_model: ref('fct_missing_primary_key_tests') + exclude_columns: + - resource_type + - model_type - name: test_fct_test_coverage config: diff --git a/macros/unpack/get_column_values.sql b/macros/unpack/get_column_values.sql index d1607b57..5e1f5108 100644 --- a/macros/unpack/get_column_values.sql +++ b/macros/unpack/get_column_values.sql @@ -24,6 +24,9 @@ wrap_string_with_quotes(dbt.escape_single_quotes(column.name)), wrap_string_with_quotes(dbt.escape_single_quotes(column.description)), wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)), + wrap_string_with_quotes(dbt.escape_single_quotes(tojson(column.constraints))), + column.constraints | selectattr('type', 'equalto', 'not_null') | list | length > 0, + column.constraints | length, wrap_string_with_quotes(dbt.escape_single_quotes(column.quote)) ] %} diff --git a/models/marts/tests/intermediate/int_model_test_summary.sql b/models/marts/tests/intermediate/int_model_test_summary.sql index 62b432b8..8cc64127 100644 --- a/models/marts/tests/intermediate/int_model_test_summary.sql +++ b/models/marts/tests/intermediate/int_model_test_summary.sql @@ -14,6 +14,12 @@ count_column_tests as ( select relationships.direct_parent_id, all_graph_resources.column_name, + sum(case + when all_graph_resources.is_test_unique + then 1 + else 0 + end + ) as test_unique_count, {%- for test_set in var('primary_key_test_macros') %} {%- set outer_loop = loop -%} count(distinct case when @@ -32,6 +38,34 @@ count_column_tests as ( group by 1,2 ), +count_column_constraints as ( + + select + node_unique_id as direct_parent_id, + name as column_name, + case + when has_not_null_constraint + then 1 + else 0 + end as constraint_not_null_count, + constraints_count + from {{ ref('base_node_columns') }} + +), + +combine_column_counts as ( + + select + count_column_tests.*, + count_column_tests.test_unique_count + count_column_constraints.constraint_not_null_count as primary_key_mixed_method_count, + count_column_constraints.constraints_count + from count_column_tests + left join count_column_constraints + on count_column_tests.direct_parent_id = count_column_constraints.direct_parent_id + and count_column_tests.column_name = count_column_constraints.column_name + +), + agg_test_relationships as ( select @@ -41,14 +75,16 @@ agg_test_relationships as ( {%- for test_set in var('primary_key_test_macros') %} {%- set compare_value = test_set | length %} primary_key_method_{{ loop.index }}_count >= {{ compare_value}} - {%- if not loop.last %} or {% endif %} - {%- endfor %} + or + {%- endfor %} + primary_key_mixed_method_count >= 2 ) then 1 else 0 end ) >= 1 as is_primary_key_tested, - sum(tests_count) as number_of_tests_on_model - from count_column_tests + sum(tests_count) as number_of_tests_on_model, + sum(constraints_count) as number_of_constraints_on_model + from combine_column_counts group by 1 ), @@ -59,7 +95,8 @@ final as ( all_graph_resources.resource_type, all_graph_resources.model_type, coalesce(agg_test_relationships.is_primary_key_tested, FALSE) as is_primary_key_tested, - coalesce(agg_test_relationships.number_of_tests_on_model, 0) as number_of_tests_on_model + coalesce(agg_test_relationships.number_of_tests_on_model, 0) as number_of_tests_on_model, + coalesce(agg_test_relationships.number_of_constraints_on_model, 0) as number_of_constraints_on_model from all_graph_resources left join agg_test_relationships on all_graph_resources.resource_id = agg_test_relationships.direct_parent_id diff --git a/models/staging/graph/base/base_node_columns.sql b/models/staging/graph/base/base_node_columns.sql index 81dd5a12..d08954c1 100644 --- a/models/staging/graph/base/base_node_columns.sql +++ b/models/staging/graph/base/base_node_columns.sql @@ -18,7 +18,10 @@ select cast(null as {{ dbt.type_string()}}) as name, cast(null as {{ dbt_project_evaluator.type_large_string()}}) as description, cast(null as {{ dbt.type_string()}}) as data_type, + cast(null as {{ dbt.type_string()}}) as constraints, + cast(True as boolean) as has_not_null_constraint, + cast(0 as {{ dbt.type_int() }}) as constraints_count, cast(null as {{ dbt.type_string()}}) as quote from dummy_cte -where false \ No newline at end of file +where false