diff --git a/.circleci/config.yml b/.circleci/config.yml index ede02ac6..9d179a4c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,11 +1,9 @@ - version: 2.1 orbs: azure-cli: circleci/azure-cli@1.1.0 jobs: - integration-redshift: docker: - image: cimg/python:3.9.9 @@ -16,7 +14,7 @@ jobs: command: ./run_test.sh redshift - store_artifacts: path: ./logs - + integration-snowflake: docker: - image: cimg/python:3.9.9 @@ -27,7 +25,7 @@ jobs: command: ./run_test.sh snowflake - store_artifacts: path: ./logs - + integration-bigquery: environment: BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" @@ -102,16 +100,23 @@ jobs: - store_artifacts: path: ./logs - workflows: version: 2 test-all: jobs: - - integration-redshift - - integration-snowflake - - integration-bigquery - - integration-databricks - #- integration-synapse + - integration-redshift: + context: profile-redshift + - integration-snowflake: + context: profile-snowflake + - integration-bigquery: + context: profile-bigquery + - integration-databricks: + context: + - aws-credentials + - profile-databricks + #- integration-synapse: + # context: profile-synapse #- integration-azuresql: + # context: profile-azure # requires: # - integration-synapse diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0082d7cf..ff8cbb24 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @jtcohen6 \ No newline at end of file +* @jeremyyeo diff --git a/README.md b/README.md index fbe4357b..51e34d31 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ execute the appropriate `create`, `refresh`, and/or `drop` commands: If you encounter issues using this package or have questions, please check the [open issues](https://github.com/dbt-labs/dbt-external-tables/issues), as there's a chance it's a known limitation or work in progress. If not, you can: - open a new issue to report a bug or suggest an enhancement -- post a technical question to [StackOverflow](https://stackoverflow.com/questions/tagged/dbt) +- post a technical question to [the Community Forum](https://discourse.getdbt.com/c/help/19) - post a conceptual question to the relevant database channel (#db-redshift, #dbt-snowflake, etc) in the [dbt Slack community](https://community.getdbt.com/) -Additional contributions to this package are very welcome! Please create issues or open PRs against `master`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-an-external-dbt-package/657) on the best workflow for contributing to a package. \ No newline at end of file +Additional contributions to this package are very welcome! Please create issues or open PRs against `master`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-an-external-dbt-package/657) on the best workflow for contributing to a package. diff --git a/integration_tests/models/plugins/bigquery/bigquery_external.yml b/integration_tests/models/plugins/bigquery/bigquery_external.yml index 92e1e39a..3b81230b 100644 --- a/integration_tests/models/plugins/bigquery/bigquery_external.yml +++ b/integration_tests/models/plugins/bigquery/bigquery_external.yml @@ -37,7 +37,7 @@ sources: options: format: csv skip_leading_rows: 1 - hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/csv'" + hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv' partitions: &parts-of-the-people - name: section data_type: string @@ -50,7 +50,7 @@ sources: options: format: csv skip_leading_rows: 1 - hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/csv'" + hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv' tests: *equal-to-the-people - name: people_csv_override_uris diff --git a/macros/common/create_external_schema.sql b/macros/common/create_external_schema.sql new file mode 100644 index 00000000..c96cbcda --- /dev/null +++ b/macros/common/create_external_schema.sql @@ -0,0 +1,11 @@ +{% macro create_external_schema(source_node) %} + {{ adapter.dispatch('create_external_schema', 'dbt_external_tables')(source_node) }} +{% endmacro %} + +{% macro default__create_external_schema(source_node) %} + {% set ddl %} + create schema if not exists {{ source_node.schema }} + {% endset %} + + {{return(ddl)}} +{% endmacro %} diff --git a/macros/plugins/bigquery/create_external_table.sql b/macros/plugins/bigquery/create_external_table.sql index cefa7024..af0970cf 100644 --- a/macros/plugins/bigquery/create_external_table.sql +++ b/macros/plugins/bigquery/create_external_table.sql @@ -15,7 +15,8 @@ create or replace external table {{source(source_node.source_name, source_node.name)}} {%- if columns -%}( {% for column in columns %} - {{column.name}} {{column.data_type}} {{- ',' if not loop.last -}} + {%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %} + {{column_quoted}} {{column.data_type}} {{- ',' if not loop.last -}} {%- endfor -%} ) {% endif %} diff --git a/macros/plugins/bigquery/get_external_build_plan.sql b/macros/plugins/bigquery/get_external_build_plan.sql index fe7d7838..f90c1bfa 100644 --- a/macros/plugins/bigquery/get_external_build_plan.sql +++ b/macros/plugins/bigquery/get_external_build_plan.sql @@ -11,7 +11,10 @@ {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %} {% if create_or_replace %} - {% set build_plan = build_plan + [dbt_external_tables.create_external_table(source_node)] %} + {% set build_plan = build_plan + [ + dbt_external_tables.create_external_schema(source_node), + dbt_external_tables.create_external_table(source_node) + ] %} {% else %} {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} {% endif %} diff --git a/macros/plugins/snowflake/create_external_table.sql b/macros/plugins/snowflake/create_external_table.sql index 721611c2..7bb46291 100644 --- a/macros/plugins/snowflake/create_external_table.sql +++ b/macros/plugins/snowflake/create_external_table.sql @@ -33,4 +33,5 @@ {% if external.pattern -%} pattern = '{{external.pattern}}' {%- endif %} {% if external.integration -%} integration = '{{external.integration}}' {%- endif %} file_format = {{external.file_format}} + {% if external.table_format -%} table_format = '{{external.table_format}}' {%- endif %} {% endmacro %} diff --git a/macros/plugins/snowflake/get_external_build_plan.sql b/macros/plugins/snowflake/get_external_build_plan.sql index 0f73a0b2..807cbb5d 100644 --- a/macros/plugins/snowflake/get_external_build_plan.sql +++ b/macros/plugins/snowflake/get_external_build_plan.sql @@ -14,6 +14,7 @@ {% if create_or_replace %} {% set build_plan = build_plan + [ + dbt_external_tables.create_external_schema(source_node), dbt_external_tables.snowflake_create_empty_table(source_node), dbt_external_tables.snowflake_get_copy_sql(source_node, explicit_transaction=true), dbt_external_tables.snowflake_create_snowpipe(source_node) @@ -25,7 +26,10 @@ {% else %} {% if create_or_replace %} - {% set build_plan = build_plan + [dbt_external_tables.create_external_table(source_node)] %} + {% set build_plan = build_plan + [ + dbt_external_tables.create_external_schema(source_node), + dbt_external_tables.create_external_table(source_node) + ] %} {% else %} {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} {% endif %} diff --git a/macros/plugins/snowflake/snowpipe/create_snowpipe.sql b/macros/plugins/snowflake/snowpipe/create_snowpipe.sql index 88385345..a9ba44eb 100644 --- a/macros/plugins/snowflake/snowpipe/create_snowpipe.sql +++ b/macros/plugins/snowflake/snowpipe/create_snowpipe.sql @@ -8,6 +8,7 @@ {% if snowpipe.auto_ingest -%} auto_ingest = {{snowpipe.auto_ingest}} {%- endif %} {% if snowpipe.aws_sns_topic -%} aws_sns_topic = '{{snowpipe.aws_sns_topic}}' {%- endif %} {% if snowpipe.integration -%} integration = '{{snowpipe.integration}}' {%- endif %} + {% if snowpipe.error_integration -%} error_integration = '{{snowpipe.error_integration}}' {%- endif %} as {{ dbt_external_tables.snowflake_get_copy_sql(source_node) }} {% endmacro %} diff --git a/macros/plugins/spark/create_external_table.sql b/macros/plugins/spark/create_external_table.sql index 4232f474..0fc8c47e 100644 --- a/macros/plugins/spark/create_external_table.sql +++ b/macros/plugins/spark/create_external_table.sql @@ -5,10 +5,19 @@ {%- set partitions = external.partitions -%} {%- set options = external.options -%} + {%- set columns_and_partitions = columns | list -%} + {%- if partitions -%} + {%- for i in partitions -%} + {%- if i.name not in columns_and_partitions | list | map(attribute='name') -%} + {%- do columns_and_partitions.append(i) -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {# https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html #} create table {{source(source_node.source_name, source_node.name)}} - {%- if columns|length > 0 %} ( - {% for column in columns %} + {%- if columns | length > 0 %} ( + {% for column in columns_and_partitions %} {{column.name}} {{column.data_type}} {{- ',' if not loop.last -}} {% endfor %} @@ -21,7 +30,7 @@ ) {%- endif %} {% if partitions -%} partitioned by ( {%- for partition in partitions -%} - {{partition.name}} {{partition.data_type}}{{', ' if not loop.last}} + {{partition.name}}{{', ' if not loop.last}} {%- endfor -%} ) {%- endif %} {% if external.row_format -%} row format {{external.row_format}} {%- endif %} diff --git a/macros/plugins/spark/get_external_build_plan.sql b/macros/plugins/spark/get_external_build_plan.sql index 47ab3247..cab1ca29 100644 --- a/macros/plugins/spark/get_external_build_plan.sql +++ b/macros/plugins/spark/get_external_build_plan.sql @@ -12,6 +12,7 @@ {% if create_or_replace %} {% set build_plan = build_plan + [ + dbt_external_tables.create_external_schema(source_node), dbt_external_tables.dropif(source_node), dbt_external_tables.create_external_table(source_node) ] %} @@ -19,8 +20,8 @@ {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} {% endif %} - {% set recover_partitions = spark__recover_partitions(source_node) %} - {% if recover_partitions|length > 0 %} + {% set recover_partitions = dbt_external_tables.recover_partitions(source_node) %} + {% if recover_partitions %} {% set build_plan = build_plan + [ recover_partitions ] %} diff --git a/macros/plugins/spark/helpers/recover_partitions.sql b/macros/plugins/spark/helpers/recover_partitions.sql index a79de3e2..fee174f0 100644 --- a/macros/plugins/spark/helpers/recover_partitions.sql +++ b/macros/plugins/spark/helpers/recover_partitions.sql @@ -1,12 +1,28 @@ {% macro spark__recover_partitions(source_node) %} {# https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-table.html #} - {% set ddl %} - {%- if (source_node.external.partitions or source_node.external.recover_partitions) and source_node.external.using and source_node.external.using|lower != 'delta' -%} - ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS + {%- if (source_node.external.partitions or source_node.external.recover_partitions) + and source_node.external.using and source_node.external.using|lower != 'delta' -%} + {% set ddl %} + ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS + {% endset %} + {%- else -%} + {% set ddl = none %} {%- endif -%} - {% endset %} {{return(ddl)}} {% endmacro %} + +{% macro recover_partitions(source_node) %} + {{ return(adapter.dispatch('recover_partitions', 'dbt_external_tables')(source_node)) }} +{% endmacro %} + +{% macro default__recover_partitions(source_node) %} + /*{# + We're dispatching this macro so that users can override it if required on other adapters + but this will work for spark/databricks. + #}*/ + + {{ exceptions.raise_not_implemented('recover_partitions macro not implemented for adapter ' + adapter.type()) }} +{% endmacro %} diff --git a/macros/plugins/sqlserver/create_external_schema.sql b/macros/plugins/sqlserver/create_external_schema.sql new file mode 100644 index 00000000..c1ef48be --- /dev/null +++ b/macros/plugins/sqlserver/create_external_schema.sql @@ -0,0 +1,13 @@ +{% macro sqlserver__create_external_schema(source_node) %} + {# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-schema-transact-sql?view=sql-server-ver16 #} + + {% set ddl %} + IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '{{ source_node.schema }}') + BEGIN + EXEC('CREATE SCHEMA [{{ source_node.schema }}]') + END + {% endset %} + + {{return(ddl)}} + +{% endmacro %} diff --git a/macros/plugins/sqlserver/get_external_build_plan.sql b/macros/plugins/sqlserver/get_external_build_plan.sql index 4e974100..7a161a0d 100644 --- a/macros/plugins/sqlserver/get_external_build_plan.sql +++ b/macros/plugins/sqlserver/get_external_build_plan.sql @@ -12,6 +12,7 @@ {% if create_or_replace %} {% set build_plan = build_plan + [ + dbt_external_tables.create_external_schema(source_node), dbt_external_tables.dropif(source_node), dbt_external_tables.create_external_table(source_node) ] %} diff --git a/sample_sources/snowflake.yml b/sample_sources/snowflake.yml index 598d5073..83d710ea 100644 --- a/sample_sources/snowflake.yml +++ b/sample_sources/snowflake.yml @@ -60,4 +60,11 @@ sources: # # if you do not specify *any* columns for a snowpiped table, dbt will also # include `value`, the JSON blob of all file contents. - \ No newline at end of file + + - name: delta_tbl + description: "External table using Delta files" + external: + location: "@stage" # reference an existing external stage + file_format: "( type = parquet )" # fully specified here, or reference an existing file format + table_format: delta # specify the table format + auto_refresh: false # requires configuring an event notification from Amazon S3 or Azure diff --git a/sample_sources/spark.yml b/sample_sources/spark.yml index 5e49b6fa..a7c9b95b 100644 --- a/sample_sources/spark.yml +++ b/sample_sources/spark.yml @@ -12,6 +12,13 @@ sources: sep: '|' header: 'true' timestampFormat: 'yyyy-MM-dd HH:mm' + partitions: + - name: year + data_type: int + - name: month + data_type: int + - name: day + data_type: int columns: - name: app_id