Skip to content

Commit

Permalink
merged-latest
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarno Rajala committed Apr 13, 2023
2 parents d9e0ac1 + 34c1cc4 commit 7f38fd6
Show file tree
Hide file tree
Showing 17 changed files with 108 additions and 28 deletions.
25 changes: 15 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@

version: 2.1

orbs:
azure-cli: circleci/[email protected]

jobs:

integration-redshift:
docker:
- image: cimg/python:3.9.9
Expand All @@ -16,7 +14,7 @@ jobs:
command: ./run_test.sh redshift
- store_artifacts:
path: ./logs

integration-snowflake:
docker:
- image: cimg/python:3.9.9
Expand All @@ -27,7 +25,7 @@ jobs:
command: ./run_test.sh snowflake
- store_artifacts:
path: ./logs

integration-bigquery:
environment:
BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json"
Expand Down Expand Up @@ -102,16 +100,23 @@ jobs:
- store_artifacts:
path: ./logs


workflows:
version: 2
test-all:
jobs:
- integration-redshift
- integration-snowflake
- integration-bigquery
- integration-databricks
#- integration-synapse
- integration-redshift:
context: profile-redshift
- integration-snowflake:
context: profile-snowflake
- integration-bigquery:
context: profile-bigquery
- integration-databricks:
context:
- aws-credentials
- profile-databricks
#- integration-synapse:
# context: profile-synapse
#- integration-azuresql:
# context: profile-azure
# requires:
# - integration-synapse
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @jtcohen6
* @jeremyyeo
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ execute the appropriate `create`, `refresh`, and/or `drop` commands:

If you encounter issues using this package or have questions, please check the [open issues](https://github.com/dbt-labs/dbt-external-tables/issues), as there's a chance it's a known limitation or work in progress. If not, you can:
- open a new issue to report a bug or suggest an enhancement
- post a technical question to [StackOverflow](https://stackoverflow.com/questions/tagged/dbt)
- post a technical question to [the Community Forum](https://discourse.getdbt.com/c/help/19)
- post a conceptual question to the relevant database channel (#db-redshift, #dbt-snowflake, etc) in the [dbt Slack community](https://community.getdbt.com/)

Additional contributions to this package are very welcome! Please create issues or open PRs against `master`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-an-external-dbt-package/657) on the best workflow for contributing to a package.
Additional contributions to this package are very welcome! Please create issues or open PRs against `master`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-an-external-dbt-package/657) on the best workflow for contributing to a package.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ sources:
options:
format: csv
skip_leading_rows: 1
hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/csv'"
hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv'
partitions: &parts-of-the-people
- name: section
data_type: string
Expand All @@ -50,7 +50,7 @@ sources:
options:
format: csv
skip_leading_rows: 1
hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/csv'"
hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv'
tests: *equal-to-the-people

- name: people_csv_override_uris
Expand Down
11 changes: 11 additions & 0 deletions macros/common/create_external_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{% macro create_external_schema(source_node) %}
{{ adapter.dispatch('create_external_schema', 'dbt_external_tables')(source_node) }}
{% endmacro %}

{% macro default__create_external_schema(source_node) %}
{% set ddl %}
create schema if not exists {{ source_node.schema }}
{% endset %}

{{return(ddl)}}
{% endmacro %}
3 changes: 2 additions & 1 deletion macros/plugins/bigquery/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
create or replace external table {{source(source_node.source_name, source_node.name)}}
{%- if columns -%}(
{% for column in columns %}
{{column.name}} {{column.data_type}} {{- ',' if not loop.last -}}
{%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %}
{{column_quoted}} {{column.data_type}} {{- ',' if not loop.last -}}
{%- endfor -%}
)
{% endif %}
Expand Down
5 changes: 4 additions & 1 deletion macros/plugins/bigquery/get_external_build_plan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
{% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %}

{% if create_or_replace %}
{% set build_plan = build_plan + [dbt_external_tables.create_external_table(source_node)] %}
{% set build_plan = build_plan + [
dbt_external_tables.create_external_schema(source_node),
dbt_external_tables.create_external_table(source_node)
] %}
{% else %}
{% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %}
{% endif %}
Expand Down
1 change: 1 addition & 0 deletions macros/plugins/snowflake/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@
{% if external.pattern -%} pattern = '{{external.pattern}}' {%- endif %}
{% if external.integration -%} integration = '{{external.integration}}' {%- endif %}
file_format = {{external.file_format}}
{% if external.table_format -%} table_format = '{{external.table_format}}' {%- endif %}
{% endmacro %}
6 changes: 5 additions & 1 deletion macros/plugins/snowflake/get_external_build_plan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

{% if create_or_replace %}
{% set build_plan = build_plan + [
dbt_external_tables.create_external_schema(source_node),
dbt_external_tables.snowflake_create_empty_table(source_node),
dbt_external_tables.snowflake_get_copy_sql(source_node, explicit_transaction=true),
dbt_external_tables.snowflake_create_snowpipe(source_node)
Expand All @@ -25,7 +26,10 @@
{% else %}

{% if create_or_replace %}
{% set build_plan = build_plan + [dbt_external_tables.create_external_table(source_node)] %}
{% set build_plan = build_plan + [
dbt_external_tables.create_external_schema(source_node),
dbt_external_tables.create_external_table(source_node)
] %}
{% else %}
{% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %}
{% endif %}
Expand Down
1 change: 1 addition & 0 deletions macros/plugins/snowflake/snowpipe/create_snowpipe.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
{% if snowpipe.auto_ingest -%} auto_ingest = {{snowpipe.auto_ingest}} {%- endif %}
{% if snowpipe.aws_sns_topic -%} aws_sns_topic = '{{snowpipe.aws_sns_topic}}' {%- endif %}
{% if snowpipe.integration -%} integration = '{{snowpipe.integration}}' {%- endif %}
{% if snowpipe.error_integration -%} error_integration = '{{snowpipe.error_integration}}' {%- endif %}
as {{ dbt_external_tables.snowflake_get_copy_sql(source_node) }}

{% endmacro %}
15 changes: 12 additions & 3 deletions macros/plugins/spark/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,19 @@
{%- set partitions = external.partitions -%}
{%- set options = external.options -%}

{%- set columns_and_partitions = columns | list -%}
{%- if partitions -%}
{%- for i in partitions -%}
{%- if i.name not in columns_and_partitions | list | map(attribute='name') -%}
{%- do columns_and_partitions.append(i) -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}

{# https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html #}
create table {{source(source_node.source_name, source_node.name)}}
{%- if columns|length > 0 %} (
{% for column in columns %}
{%- if columns | length > 0 %} (
{% for column in columns_and_partitions %}
{{column.name}} {{column.data_type}}
{{- ',' if not loop.last -}}
{% endfor %}
Expand All @@ -21,7 +30,7 @@
) {%- endif %}
{% if partitions -%} partitioned by (
{%- for partition in partitions -%}
{{partition.name}} {{partition.data_type}}{{', ' if not loop.last}}
{{partition.name}}{{', ' if not loop.last}}
{%- endfor -%}
) {%- endif %}
{% if external.row_format -%} row format {{external.row_format}} {%- endif %}
Expand Down
5 changes: 3 additions & 2 deletions macros/plugins/spark/get_external_build_plan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@

{% if create_or_replace %}
{% set build_plan = build_plan + [
dbt_external_tables.create_external_schema(source_node),
dbt_external_tables.dropif(source_node),
dbt_external_tables.create_external_table(source_node)
] %}
{% else %}
{% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %}
{% endif %}

{% set recover_partitions = spark__recover_partitions(source_node) %}
{% if recover_partitions|length > 0 %}
{% set recover_partitions = dbt_external_tables.recover_partitions(source_node) %}
{% if recover_partitions %}
{% set build_plan = build_plan + [
recover_partitions
] %}
Expand Down
24 changes: 20 additions & 4 deletions macros/plugins/spark/helpers/recover_partitions.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
{% macro spark__recover_partitions(source_node) %}
{# https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-table.html #}

{% set ddl %}
{%- if (source_node.external.partitions or source_node.external.recover_partitions) and source_node.external.using and source_node.external.using|lower != 'delta' -%}
ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS
{%- if (source_node.external.partitions or source_node.external.recover_partitions)
and source_node.external.using and source_node.external.using|lower != 'delta' -%}
{% set ddl %}
ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS
{% endset %}
{%- else -%}
{% set ddl = none %}
{%- endif -%}
{% endset %}

{{return(ddl)}}

{% endmacro %}

{% macro recover_partitions(source_node) %}
{{ return(adapter.dispatch('recover_partitions', 'dbt_external_tables')(source_node)) }}
{% endmacro %}

{% macro default__recover_partitions(source_node) %}
/*{#
We're dispatching this macro so that users can override it if required on other adapters
but this will work for spark/databricks.
#}*/

{{ exceptions.raise_not_implemented('recover_partitions macro not implemented for adapter ' + adapter.type()) }}
{% endmacro %}
13 changes: 13 additions & 0 deletions macros/plugins/sqlserver/create_external_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% macro sqlserver__create_external_schema(source_node) %}
{# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-schema-transact-sql?view=sql-server-ver16 #}

{% set ddl %}
IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '{{ source_node.schema }}')
BEGIN
EXEC('CREATE SCHEMA [{{ source_node.schema }}]')
END
{% endset %}

{{return(ddl)}}

{% endmacro %}
1 change: 1 addition & 0 deletions macros/plugins/sqlserver/get_external_build_plan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

{% if create_or_replace %}
{% set build_plan = build_plan + [
dbt_external_tables.create_external_schema(source_node),
dbt_external_tables.dropif(source_node),
dbt_external_tables.create_external_table(source_node)
] %}
Expand Down
9 changes: 8 additions & 1 deletion sample_sources/snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,11 @@ sources:
#
# if you do not specify *any* columns for a snowpiped table, dbt will also
# include `value`, the JSON blob of all file contents.


- name: delta_tbl
description: "External table using Delta files"
external:
location: "@stage" # reference an existing external stage
file_format: "( type = parquet )" # fully specified here, or reference an existing file format
table_format: delta # specify the table format
auto_refresh: false # requires configuring an event notification from Amazon S3 or Azure
7 changes: 7 additions & 0 deletions sample_sources/spark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ sources:
sep: '|'
header: 'true'
timestampFormat: 'yyyy-MM-dd HH:mm'
partitions:
- name: year
data_type: int
- name: month
data_type: int
- name: day
data_type: int

columns:
- name: app_id
Expand Down

0 comments on commit 7f38fd6

Please sign in to comment.