dbt_project.yml

name: 'dbt_project_evaluator'
version: '1.0.0'
config-version: 2

require-dbt-version: [">=1.6.0-rc1", "<2.0.0"]

model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target"  # directory which will store compiled SQL files
clean-targets:         # directories to be removed by `dbt clean`
  - "target"
  - "dbt_packages"


dispatch:
  - macro_namespace: dbt
    search_order: ['dbt_project_evaluator', 'dbt']

models:
  dbt_project_evaluator:
    +materialized: "{{ 'table' if target.type in ['duckdb'] else 'view' }}"
    marts:
      core:
        int_all_graph_resources:
          +materialized: table   
        int_direct_relationships:
          # required for BigQuery and Redshift for performance/memory reasons
          +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
        int_all_dag_relationships:
          # required for BigQuery, Redshift, and Databricks for performance/memory reasons
          +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks', 'clickhouse'] else 'view' }}"
      dag:
        +materialized: table
    staging:
      graph:
        stg_node_relationships:
          +materialized: table
      variables:
        stg_naming_convention_folders:
          # required for Redshift because listagg runs only on tables
          +materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"
        stg_naming_convention_prefixes:
          # required for Redshift because listagg runs only on tables
          +materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"


vars:

  # -- Tests and docs coverage variables --
  documentation_coverage_target: 100
  test_coverage_target: 100

  primary_key_test_macros: [["dbt.test_unique", "dbt.test_not_null"], ["dbt_utils.test_unique_combination_of_columns"]]

  # -- Graph variables --
  # node types to test for primary key coverage. acceptable node types: model, source, snapshot, seed
  enforced_primary_key_node_types: ["model"]

  # -- DAG variables --
  models_fanout_threshold: 3
  too_many_joins_threshold: 7

  # -- Naming conventions variables --
  # to add a new "model type", update the variable model_types 
  # and create new variables with the names <model_type>_folder_name and/or <model_type>_prefixes 
  model_types: ['base', 'staging', 'intermediate', 'marts', 'other']

  base_folder_name: 'base'
  staging_folder_name: 'staging'
  intermediate_folder_name: 'intermediate'
  marts_folder_name: 'marts'
  
  base_prefixes: ['base_']
  staging_prefixes: ['stg_']
  intermediate_prefixes: ['int_']
  marts_prefixes: ['fct_', 'dim_']
  other_prefixes: ['rpt_']

  # -- Performance variables --
  chained_views_threshold: "{{ 5 if target.type not in ['athena', 'trino'] else 4 }}"

  # -- Execution variables --
  insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}"
  max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino', 'clickhouse'] else -1 }}"

  # -- Code complexity variables --
  comment_chars: ["--"]
  token_costs: {
    "and": 0.1,
    "or": 0.1,
    "when": 0.5,
    "coalesce": 1,
    "distinct": 1,
    "greatest": 1,
    "least": 1,
    "group": 1,
    "join": 1,
    "order": 1,
    "select": 1,
    "where": 1,
    "having": 2,
    "flatten": 3,
    "unnest": 3,
    "pivot": 3,
    "partition by": 3,
    "qualify": 3,
    }