From 225f74a9d91e3f7143b590641fc11f4fb2e15a3e Mon Sep 17 00:00:00 2001 From: harsha-stellar-data Date: Thu, 31 Oct 2024 14:43:09 -0400 Subject: [PATCH] Added Type-II Dimensional Models and the corresponding yml files with tests for the Soroban Contract tables --- .../intermediate/int_incr_contract_code.sql | 2 +- .../intermediate/int_incr_contract_code.yml | 103 ++++++++++++++ .../intermediate/int_incr_contract_data.sql | 2 +- .../intermediate/int_incr_contract_data.yml | 100 +++++++++++++ models/intermediate/int_incr_ttl.sql | 2 +- models/intermediate/int_incr_ttl.yml | 75 ++++++++++ .../int_transform_contract_code.sql | 2 +- .../int_transform_contract_code.yml | 106 ++++++++++++++ .../int_transform_contract_data.sql | 2 +- .../int_transform_contract_data.yml | 88 ++++++++++++ models/intermediate/int_transform_ttl.sql | 2 +- models/intermediate/int_transform_ttl.yml | 73 ++++++++++ models/marts/dim_contract_code_current.sql | 2 +- models/marts/dim_contract_code_current.yml | 94 ++++++++++++ models/marts/dim_contract_code_hist.sql | 2 +- models/marts/dim_contract_code_hist.yml | 134 ++++++++++++++++++ models/marts/dim_contract_data_current.sql | 2 +- models/marts/dim_contract_data_current.yml | 82 +++++++++++ models/marts/dim_contract_data_hist.sql | 2 +- models/marts/dim_contract_data_hist.yml | 122 ++++++++++++++++ models/marts/dim_ttl_current.sql | 2 +- models/marts/dim_ttl_current.yml | 64 +++++++++ models/marts/dim_ttl_hist.sql | 2 +- models/marts/dim_ttl_hist.yml | 104 ++++++++++++++ 24 files changed, 1157 insertions(+), 12 deletions(-) create mode 100644 models/intermediate/int_incr_contract_code.yml create mode 100644 models/intermediate/int_incr_contract_data.yml create mode 100644 models/intermediate/int_incr_ttl.yml create mode 100644 models/intermediate/int_transform_contract_code.yml create mode 100644 models/intermediate/int_transform_contract_data.yml create mode 100644 models/intermediate/int_transform_ttl.yml create mode 100644 models/marts/dim_contract_code_current.yml create mode 100644 models/marts/dim_contract_code_hist.yml create mode 100644 models/marts/dim_contract_data_current.yml create mode 100644 models/marts/dim_contract_data_hist.yml create mode 100644 models/marts/dim_ttl_current.yml create mode 100644 models/marts/dim_ttl_hist.yml diff --git a/models/intermediate/int_incr_contract_code.sql b/models/intermediate/int_incr_contract_code.sql index de56414..3bdad38 100644 --- a/models/intermediate/int_incr_contract_code.sql +++ b/models/intermediate/int_incr_contract_code.sql @@ -3,7 +3,7 @@ materialized = 'incremental', unique_key = ['ledger_key_hash', 'closed_at'], cluster_by = ["ledger_key_hash", "closed_at"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} /* diff --git a/models/intermediate/int_incr_contract_code.yml b/models/intermediate/int_incr_contract_code.yml new file mode 100644 index 0000000..b92ab17 --- /dev/null +++ b/models/intermediate/int_incr_contract_code.yml @@ -0,0 +1,103 @@ +version: 2 + +models: + - name: int_incr_contract_code + description: "This intermediate model handles the incremental loading of contract code data from the staging layer." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Incremental model for contract code with daily updates" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of contract code tracking fields" + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + + - name: contract_code_hash + description: '{{ doc("contract_code_hash") }}' + tests: + - not_null + + - name: last_modified_ledger + description: '{{ doc("last_modified_ledger") }}' + tests: + - not_null + + - name: ledger_entry_change + description: '{{ doc("ledger_entry_change") }}' + tests: + - not_null + - incremental_accepted_values: + values: [0, 1, 2] + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: deleted + description: '{{ doc("deleted") }}' + tests: + - not_null + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: n_instructions + description: '{{ doc("n_instructions") }}' + + - name: n_functions + description: '{{ doc("n_functions") }}' + + - name: n_globals + description: '{{ doc("n_globals") }}' + + - name: n_table_entries + description: '{{ doc("n_table_entries") }}' + + - name: n_types + description: '{{ doc("n_types") }}' + + - name: n_data_segments + description: '{{ doc("n_data_segments") }}' + + - name: n_elem_segments + description: '{{ doc("n_elem_segments") }}' + + - name: n_imports + description: '{{ doc("n_imports") }}' + + - name: n_exports + description: '{{ doc("n_exports") }}' + + - name: n_data_segment_bytes + description: '{{ doc("n_data_segment_bytes") }}' + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null diff --git a/models/intermediate/int_incr_contract_data.sql b/models/intermediate/int_incr_contract_data.sql index 5b2568b..7fc1d37 100644 --- a/models/intermediate/int_incr_contract_data.sql +++ b/models/intermediate/int_incr_contract_data.sql @@ -8,7 +8,7 @@ "granularity": "month" }, cluster_by = ['closed_at', 'ledger_key_hash'], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} /* diff --git a/models/intermediate/int_incr_contract_data.yml b/models/intermediate/int_incr_contract_data.yml new file mode 100644 index 0000000..a1984f4 --- /dev/null +++ b/models/intermediate/int_incr_contract_data.yml @@ -0,0 +1,100 @@ +version: 2 + +models: + - name: int_incr_contract_data + description: "This intermediate model handles the incremental loading of contract data from the staging layer." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Incremental model for contract data with daily updates" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of contract data tracking fields" + + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + + - name: contract_id + description: '{{ doc("contract_id") }}' + tests: + - not_null + + - name: contract_key_type + description: '{{ doc("contract_key_type") }}' + tests: + - not_null + + - name: contract_durability + description: '{{ doc("contract_durability") }}' + tests: + - not_null + + - name: last_modified_ledger + description: '{{ doc("last_modified_ledger") }}' + tests: + - not_null + + - name: ledger_entry_change + description: '{{ doc("ledger_entry_change") }}' + tests: + - not_null + - incremental_accepted_values: + values: [0, 1, 2] + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: asset_code + description: '{{ doc("asset_code") }}' + + - name: asset_issuer + description: '{{ doc("asset_issuer") }}' + + - name: asset_type + description: '{{ doc("asset_type") }}' + + - name: balance_holder + description: '{{ doc("balance_holder") }}' + + - name: balance + description: '{{ doc("balance") }}' + + - name: deleted + description: '{{ doc("deleted") }}' + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: batch_insert_ts + description: '{{ doc("batch_insert_ts") }}' + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was loaded." + tests: + - not_null diff --git a/models/intermediate/int_incr_ttl.sql b/models/intermediate/int_incr_ttl.sql index 0e8bebd..87a0780 100644 --- a/models/intermediate/int_incr_ttl.sql +++ b/models/intermediate/int_incr_ttl.sql @@ -8,7 +8,7 @@ "granularity": "month" }, cluster_by = ['closed_at', 'key_hash'], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} /* diff --git a/models/intermediate/int_incr_ttl.yml b/models/intermediate/int_incr_ttl.yml new file mode 100644 index 0000000..bdb47a1 --- /dev/null +++ b/models/intermediate/int_incr_ttl.yml @@ -0,0 +1,75 @@ +version: 2 + +models: + - name: int_incr_ttl + description: | + This intermediate model handles the incremental loading of ledger time-to-live (TTL) + data from the staging layer. It processes only new or updated data up to the previous day based on `execution_date`. + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Incremental model for TTL data with daily updates" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of TTL tracking fields" + columns: + - name: key_hash + description: '{{ doc("key_hash") }}' + tests: + - not_null + + - name: live_until_ledger_seq + description: '{{ doc("live_until_ledger_seq") }}' + tests: + - not_null + + - name: last_modified_ledger + description: '{{ doc("last_modified_ledger") }}' + tests: + - not_null + + - name: ledger_entry_change + description: '{{ doc("ledger_entry_change") }}' + tests: + - not_null + - incremental_accepted_values: + values: [0, 1, 2] + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: deleted + description: '{{ doc("deleted") }}' + tests: + - not_null + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null diff --git a/models/intermediate/int_transform_contract_code.sql b/models/intermediate/int_transform_contract_code.sql index 88d2345..94b96fc 100644 --- a/models/intermediate/int_transform_contract_code.sql +++ b/models/intermediate/int_transform_contract_code.sql @@ -3,7 +3,7 @@ materialized = 'table', unique_key = ['ledger_key_hash', 'closed_at'], cluster_by = ["ledger_key_hash", "closed_at", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} diff --git a/models/intermediate/int_transform_contract_code.yml b/models/intermediate/int_transform_contract_code.yml new file mode 100644 index 0000000..223bfe2 --- /dev/null +++ b/models/intermediate/int_transform_contract_code.yml @@ -0,0 +1,106 @@ +version: 2 + +models: + - name: int_transform_contract_code + description: "This intermediate model applies transformations to the contract code, calculates new fields like contract_create_ts and contract_delete_ts, and performs deduplication based on row hash." + meta: + owner: "Data team" + update_schedule: "daily" + description: "Transformed and deduplicated contract code data with derived timestamps" + upstream_dependencies: ["int_incr_contract_code"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of contract code tracking fields" + columns: + - name: contract_code_hash + description: '{{ doc("contract_code_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_incr_contract_code') + field: contract_code_hash + + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_incr_contract_code') + field: ledger_key_hash + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: contract_create_ts + description: "Timestamp when the contract was created (ledger_entry_change = 0)" + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (ledger_entry_change = 2 and deleted = true)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: n_instructions + description: '{{ doc("n_instructions") }}' + + - name: n_functions + description: '{{ doc("n_functions") }}' + + - name: n_globals + description: '{{ doc("n_globals") }}' + + - name: n_table_entries + description: '{{ doc("n_table_entries") }}' + + - name: n_types + description: '{{ doc("n_types") }}' + + - name: n_data_segments + description: '{{ doc("n_data_segments") }}' + + - name: n_elem_segments + description: '{{ doc("n_elem_segments") }}' + + - name: n_imports + description: '{{ doc("n_imports") }}' + + - name: n_exports + description: '{{ doc("n_exports") }}' + + - name: n_data_segment_bytes + description: '{{ doc("n_data_segment_bytes") }}' + + - name: row_hash + description: "SHA256 hash of the row data for deduplication" + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null diff --git a/models/intermediate/int_transform_contract_data.sql b/models/intermediate/int_transform_contract_data.sql index cd6d62d..3802a66 100644 --- a/models/intermediate/int_transform_contract_data.sql +++ b/models/intermediate/int_transform_contract_data.sql @@ -8,7 +8,7 @@ "granularity": "month" }, cluster_by = ["ledger_key_hash", "closed_at", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} diff --git a/models/intermediate/int_transform_contract_data.yml b/models/intermediate/int_transform_contract_data.yml new file mode 100644 index 0000000..f190280 --- /dev/null +++ b/models/intermediate/int_transform_contract_data.yml @@ -0,0 +1,88 @@ +version: 2 + +models: + - name: int_transform_contract_data + description: "This intermediate model applies transformations to the contract data, calculates new fields like contract_create_ts and contract_delete_ts, and performs deduplication based on row hash." + meta: + owner: "Data team" + update_schedule: "daily" + description: "Transformed and deduplicated contract data with derived timestamps" + upstream_dependencies: ["int_incr_contract_data"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of contract data tracking fields" + columns: + - name: contract_id + description: '{{ doc("contract_id") }}' + tests: + - not_null + + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + + - name: contract_durability + description: '{{ doc("contract_durability") }}' + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: contract_create_ts + description: "Timestamp when the contract was created" + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (if applicable)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: asset_code + description: '{{ doc("asset_code") }}' + + - name: asset_issuer + description: '{{ doc("asset_issuer") }}' + + - name: asset_type + description: '{{ doc("asset_type") }}' + + - name: balance + description: '{{ doc("balance") }}' + + - name: balance_holder + description: '{{ doc("balance_holder") }}' + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null + + - name: row_hash + description: "SHA256 hash of the row data for deduplication" + tests: + - not_null diff --git a/models/intermediate/int_transform_ttl.sql b/models/intermediate/int_transform_ttl.sql index 9a04e09..824872c 100644 --- a/models/intermediate/int_transform_ttl.sql +++ b/models/intermediate/int_transform_ttl.sql @@ -8,7 +8,7 @@ "granularity": "month" }, cluster_by = ["key_hash", "closed_at", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "intermediate", "daily"] ) }} diff --git a/models/intermediate/int_transform_ttl.yml b/models/intermediate/int_transform_ttl.yml new file mode 100644 index 0000000..ab847f9 --- /dev/null +++ b/models/intermediate/int_transform_ttl.yml @@ -0,0 +1,73 @@ +version: 2 + +models: + - name: int_transform_ttl + description: "This intermediate model applies transformations to TTL data, calculates new fields like ttl_create_ts and ttl_delete_ts, and performs deduplication based on the transformed data." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Transformed and deduplicated TTL data with derived timestamps" + upstream_dependencies: ["int_incr_ttl"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - key_hash + - closed_at + meta: + description: "Tests the uniqueness combination of TTL tracking fields" + columns: + - name: key_hash + description: '{{ doc("key_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_incr_ttl') + field: key_hash + + - name: live_until_ledger_seq + description: '{{ doc("live_until_ledger_seq") }}' + tests: + - not_null + + - name: ttl_create_ts + description: "Timestamp when the TTL was created (ledger_entry_change = 0)" + tests: + - not_null + + - name: ttl_delete_ts + description: "Timestamp when the TTL was deleted (ledger_entry_change = 2 and deleted = true)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: ledger_sequence + description: '{{ doc("ledger_sequence") }}' + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null + + - name: row_hash + description: "SHA256 hash of the row data for deduplication" + tests: + - not_null diff --git a/models/marts/dim_contract_code_current.sql b/models/marts/dim_contract_code_current.sql index 5274823..22d756d 100644 --- a/models/marts/dim_contract_code_current.sql +++ b/models/marts/dim_contract_code_current.sql @@ -3,7 +3,7 @@ materialized = 'table', unique_key = ['ledger_key_hash'], cluster_by = ["ledger_key_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "daily"] ) }} diff --git a/models/marts/dim_contract_code_current.yml b/models/marts/dim_contract_code_current.yml new file mode 100644 index 0000000..dabd7ce --- /dev/null +++ b/models/marts/dim_contract_code_current.yml @@ -0,0 +1,94 @@ +version: 2 + +models: + - name: dim_contract_code_current + description: "Current snapshot of contract code data, showing only the latest active state for each contract." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Current state snapshot of all contracts" + upstream_dependencies: ["dim_contract_code_hist"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + meta: + description: "Ensures each contract appears exactly once" + + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + - unique + - relationships: + to: ref('dim_contract_code_hist') + field: ledger_key_hash + + - name: contract_code_hash + description: '{{ doc("contract_code_hash") }}' + tests: + - not_null + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: contract_create_ts + description: "Timestamp when the contract was created" + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (if applicable)" + + - name: n_instructions + description: '{{ doc("n_instructions") }}' + + - name: n_functions + description: '{{ doc("n_functions") }}' + + - name: n_globals + description: '{{ doc("n_globals") }}' + + - name: n_table_entries + description: '{{ doc("n_table_entries") }}' + + - name: n_types + description: '{{ doc("n_types") }}' + + - name: n_data_segments + description: '{{ doc("n_data_segments") }}' + + - name: n_elem_segments + description: '{{ doc("n_elem_segments") }}' + + - name: n_imports + description: '{{ doc("n_imports") }}' + + - name: n_exports + description: '{{ doc("n_exports") }}' + + - name: n_data_segment_bytes + description: '{{ doc("n_data_segment_bytes") }}' + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task" + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was loaded" + tests: + - not_null diff --git a/models/marts/dim_contract_code_hist.sql b/models/marts/dim_contract_code_hist.sql index dab69d5..22215ff 100644 --- a/models/marts/dim_contract_code_hist.sql +++ b/models/marts/dim_contract_code_hist.sql @@ -3,7 +3,7 @@ materialized = 'incremental', unique_key = ['ledger_key_hash', 'start_date'], cluster_by = ["ledger_key_hash", "start_date", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "scd-2", "daily"] ) }} diff --git a/models/marts/dim_contract_code_hist.yml b/models/marts/dim_contract_code_hist.yml new file mode 100644 index 0000000..8d88916 --- /dev/null +++ b/models/marts/dim_contract_code_hist.yml @@ -0,0 +1,134 @@ +version: 2 +models: + - name: dim_contract_code_hist + description: "Slowly Changing Dimension (SCD) Type 2 implementation for tracking historical changes in contract code data." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Historical tracking of contract code changes with effective start and end dates" + upstream_dependencies: ["int_transform_contract_code"] + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - start_date + + - dbt_utils.expression_is_true: + expression: "start_date <= end_date" + meta: + description: "Ensures start_date is always before or equal to end_date" + + - dbt_utils.expression_is_true: + expression: "(NOT is_current) OR (end_date = '9999-12-31')" + meta: + description: "Ensures current records have the maximum end_date" + + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + where: "is_current = true" + meta: + description: "Ensures there is only one current record per key_hash" + + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_transform_contract_code') + field: ledger_key_hash + + - name: contract_code_hash + description: '{{ doc("contract_code_hash") }}' + tests: + - not_null + + - name: start_date + description: "The date when this version of the contract became effective" + tests: + - not_null + + - name: end_date + description: "The date when this version of the contract was superseded (9999-12-31 for current version)" + tests: + - not_null + + - name: is_current + description: "Flag indicating if this is the current version of the contract" + tests: + - not_null + + - name: contract_create_ts + description: "Timestamp when the contract was created (ledger_entry_change = 0)" + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (if applicable)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: n_instructions + description: '{{ doc("n_instructions") }}' + + - name: n_functions + description: '{{ doc("n_functions") }}' + + - name: n_globals + description: '{{ doc("n_globals") }}' + + - name: n_table_entries + description: '{{ doc("n_table_entries") }}' + + - name: n_types + description: '{{ doc("n_types") }}' + + - name: n_data_segments + description: '{{ doc("n_data_segments") }}' + + - name: n_elem_segments + description: '{{ doc("n_elem_segments") }}' + + - name: n_imports + description: '{{ doc("n_imports") }}' + + - name: n_exports + description: '{{ doc("n_exports") }}' + + - name: n_data_segment_bytes + description: '{{ doc("n_data_segment_bytes") }}' + + - name: row_hash + description: "SHA256 hash of the row data for change detection" + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task" + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was first loaded" + tests: + - not_null + + - name: dw_update_ts + description: "Timestamp when the record was last updated" + tests: + - not_null diff --git a/models/marts/dim_contract_data_current.sql b/models/marts/dim_contract_data_current.sql index b23ab06..47ff592 100644 --- a/models/marts/dim_contract_data_current.sql +++ b/models/marts/dim_contract_data_current.sql @@ -6,7 +6,7 @@ "data_type": "DATE" }, cluster_by = ["ledger_key_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "daily"] ) }} -- Model: dim_contract_data_current diff --git a/models/marts/dim_contract_data_current.yml b/models/marts/dim_contract_data_current.yml new file mode 100644 index 0000000..ebfb544 --- /dev/null +++ b/models/marts/dim_contract_data_current.yml @@ -0,0 +1,82 @@ +version: 2 + +models: + - name: dim_contract_data_current + description: "Current snapshot of contract data, showing only the latest active state for each contract." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Current state snapshot of all contracts" + upstream_dependencies: ["dim_contract_data_hist"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + meta: + description: "Ensures each contract appears exactly once" + + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + - unique + - relationships: + to: ref('dim_contract_data_hist') + field: ledger_key_hash + + - name: contract_id + description: '{{ doc("contract_id") }}' + tests: + - not_null + + - name: contract_durability + description: '{{ doc("contract_durability") }}' + + - name: contract_create_ts + description: "Timestamp when the contract was created." + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (if applicable)." + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: asset_code + description: '{{ doc("asset_code") }}' + + - name: asset_issuer + description: '{{ doc("asset_issuer") }}' + + - name: asset_type + description: '{{ doc("asset_type") }}' + + - name: balance + description: '{{ doc("balance") }}' + + - name: balance_holder + description: '{{ doc("balance_holder") }}' + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was loaded." + tests: + - not_null diff --git a/models/marts/dim_contract_data_hist.sql b/models/marts/dim_contract_data_hist.sql index 446b10a..eb47976 100644 --- a/models/marts/dim_contract_data_hist.sql +++ b/models/marts/dim_contract_data_hist.sql @@ -7,7 +7,7 @@ "granularity": "month" }, cluster_by = ["ledger_key_hash", "start_date", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "scd-2", "daily"] ) }} /* diff --git a/models/marts/dim_contract_data_hist.yml b/models/marts/dim_contract_data_hist.yml new file mode 100644 index 0000000..598b1e0 --- /dev/null +++ b/models/marts/dim_contract_data_hist.yml @@ -0,0 +1,122 @@ +version: 2 + +models: + - name: dim_contract_data_hist + description: "Slowly Changing Dimension (SCD) Type 2 implementation for tracking historical changes in contract data." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Historical tracking of contract data changes with effective start and end dates." + upstream_dependencies: ["int_transform_contract_data"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + - start_date + + - dbt_utils.expression_is_true: + expression: "start_date <= end_date" + meta: + description: "Ensures start_date is always before or equal to end_date" + + - dbt_utils.expression_is_true: + expression: "(NOT is_current) OR (end_date = '9999-12-31')" + meta: + description: "Ensures current records have the maximum end_date" + + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ledger_key_hash + where: "is_current = true" + meta: + description: "Ensures there is only one current record per key_hash" + + columns: + - name: ledger_key_hash + description: '{{ doc("ledger_key_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_transform_contract_data') + field: ledger_key_hash + + - name: contract_id + description: '{{ doc("contract_id") }}' + tests: + - not_null + + - name: start_date + description: "The date when this version of the contract became effective." + tests: + - not_null + + - name: end_date + description: "The date when this version of the contract was superseded (9999-12-31 for current version)." + tests: + - not_null + + - name: is_current + description: "Flag indicating if this is the current version of the contract." + tests: + - not_null + + - name: contract_create_ts + description: "Timestamp when the contract was created." + tests: + - not_null + + - name: contract_delete_ts + description: "Timestamp when the contract was deleted (if applicable)." + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: contract_durability + description: '{{ doc("contract_durability") }}' + + - name: asset_code + description: '{{ doc("asset_code") }}' + + - name: asset_issuer + description: '{{ doc("asset_issuer") }}' + + - name: asset_type + description: '{{ doc("asset_type") }}' + + - name: balance + description: '{{ doc("balance") }}' + + - name: balance_holder + description: '{{ doc("balance_holder") }}' + + - name: row_hash + description: "SHA256 hash of the row data for change detection." + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: dw_load_ts + description: "The timestamp for when the data was loaded into the data warehouse." + tests: + - not_null + + - name: dw_update_ts + description: "The timestamp for when the record was last updated." + tests: + - not_null diff --git a/models/marts/dim_ttl_current.sql b/models/marts/dim_ttl_current.sql index 0cafce7..5f5bf89 100644 --- a/models/marts/dim_ttl_current.sql +++ b/models/marts/dim_ttl_current.sql @@ -6,7 +6,7 @@ "data_type": "DATE" }, cluster_by = ["key_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "daily"] ) }} -- Model: dim_ttl_current diff --git a/models/marts/dim_ttl_current.yml b/models/marts/dim_ttl_current.yml new file mode 100644 index 0000000..422a324 --- /dev/null +++ b/models/marts/dim_ttl_current.yml @@ -0,0 +1,64 @@ +version: 2 + +models: + - name: dim_ttl_current + description: "Current snapshot of TTL data, showing only the latest active state for each key_hash." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Current state snapshot of all TTL records" + upstream_dependencies: ["dim_ttl_hist"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - key_hash + meta: + description: "Ensures each TTL record appears exactly once" + + columns: + - name: key_hash + description: '{{ doc("key_hash") }}' + tests: + - not_null + - unique + - relationships: + to: ref('dim_ttl_hist') + field: key_hash + + - name: live_until_ledger_seq + description: '{{ doc("live_until_ledger_seq") }}' + tests: + - not_null + + - name: ttl_create_ts + description: "Timestamp when the TTL was created" + tests: + - not_null + + - name: ttl_delete_ts + description: "Timestamp when the TTL was deleted (if applicable)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task" + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was loaded" + tests: + - not_null diff --git a/models/marts/dim_ttl_hist.sql b/models/marts/dim_ttl_hist.sql index 2c74d9a..79e7408 100644 --- a/models/marts/dim_ttl_hist.sql +++ b/models/marts/dim_ttl_hist.sql @@ -7,7 +7,7 @@ "granularity": "month" }, cluster_by = ["key_hash", "start_date", "row_hash"], - tags = ["soroban_analytics"] + tags = ["soroban_analytics", "dimension", "scd-2", "daily"] ) }} /* diff --git a/models/marts/dim_ttl_hist.yml b/models/marts/dim_ttl_hist.yml new file mode 100644 index 0000000..1516f55 --- /dev/null +++ b/models/marts/dim_ttl_hist.yml @@ -0,0 +1,104 @@ +version: 2 + +models: + - name: dim_ttl_hist + description: "Slowly Changing Dimension (SCD) Type 2 implementation for tracking historical changes in TTL data." + meta: + owner: "Data Team" + update_schedule: "daily" + description: "Historical tracking of TTL changes with effective start and end dates" + upstream_dependencies: ["int_transform_ttl"] + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - key_hash + - start_date + + - dbt_utils.expression_is_true: + expression: "start_date <= end_date" + meta: + description: "Ensures start_date is always before or equal to end_date" + + - dbt_utils.expression_is_true: + expression: "(NOT is_current) OR (end_date = '9999-12-31')" + meta: + description: "Ensures current records have the maximum end_date" + + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - key_hash + where: "is_current = true" + meta: + description: "Ensures there is only one current record per key_hash" + + columns: + - name: key_hash + description: '{{ doc("key_hash") }}' + tests: + - not_null + - relationships: + to: ref('int_transform_ttl') + field: key_hash + + - name: live_until_ledger_seq + description: '{{ doc("live_until_ledger_seq") }}' + tests: + - not_null + + - name: start_date + description: "The date when this version of the TTL became effective" + tests: + - not_null + + - name: end_date + description: "The date when this version of the TTL was superseded (9999-12-31 for current version)" + tests: + - not_null + + - name: is_current + description: "Flag indicating if this is the current version of the TTL" + tests: + - not_null + + - name: ttl_create_ts + description: "Timestamp when the TTL was created" + tests: + - not_null + + - name: ttl_delete_ts + description: "Timestamp when the TTL was deleted (if applicable)" + + - name: closed_at + description: '{{ doc("closed_at") }}' + tests: + - not_null + + - name: batch_id + description: '{{ doc("batch_id") }}' + tests: + - not_null + + - name: batch_run_date + description: '{{ doc("batch_run_date") }}' + tests: + - not_null + + - name: airflow_start_ts + description: "The timestamp indicating the start of the Airflow task." + tests: + - not_null + + - name: row_hash + description: "SHA256 hash of the row data for change detection" + tests: + - not_null + + - name: dw_load_ts + description: "Timestamp when the record was first loaded" + tests: + - not_null + + - name: dw_update_ts + description: "Timestamp when the record was last updated" + tests: + - not_null