-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* create sandbox-dag * fix comment * update variables * fix: debug dag for test project * fix: dataset name changed * fix: set check for replace dataset and project name for test * feat: inserted a check for existing tables and change views source to dbt dataset * fix: inseted credentials in client for bigquery * fix: insert gcp authentication in ci test * dag update for following airflow best practices * update sandbox dataset variable and schedule interval
- Loading branch information
Showing
9 changed files
with
190 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
create or replace table `{project_id}.{target_dataset}.{table_id}` | ||
partition by date_trunc(batch_run_date, month) | ||
options (partition_expiration_days = 180) as ( | ||
select * | ||
from `{project_id}.{dataset_id}.{table_id}` | ||
where batch_run_date >= date_sub(current_date(), interval 6 month) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
create or replace view `{project_id}.{target_dataset}.{table_id}` as ( | ||
select * from `{project_id}.{dataset_id}.{table_id}` | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
insert into {project_id}.{target_dataset}.{table_id} | ||
select * | ||
from {project_id}.{dataset_id}.{table_id} | ||
where date_trunc(batch_run_date, month) = date_trunc(current_date() - interval 1 month, month) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
""" | ||
This DAG creates the sandbox dataset with transactions tables, state tables with history and views. | ||
""" | ||
import datetime | ||
import json | ||
|
||
from airflow import DAG | ||
from airflow.models.variable import Variable | ||
from airflow.operators.empty import EmptyOperator | ||
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator | ||
from stellar_etl_airflow.build_bq_insert_job_task import ( | ||
file_to_string, | ||
get_query_filepath, | ||
) | ||
from stellar_etl_airflow.default import ( | ||
alert_after_max_retries, | ||
get_default_dag_args, | ||
init_sentry, | ||
) | ||
|
||
init_sentry() | ||
|
||
with DAG( | ||
"sandbox_create_dag", | ||
default_args=get_default_dag_args(), | ||
start_date=datetime.datetime(2023, 1, 1), | ||
description="This DAG creates a sandbox", | ||
schedule_interval="@once", | ||
params={"alias": "sandbox_dataset"}, | ||
user_defined_filters={ | ||
"fromjson": lambda s: json.loads(s), | ||
}, | ||
catchup=False, | ||
) as dag: | ||
PROJECT = Variable.get("bq_project") | ||
DATASET = Variable.get("bq_dataset") | ||
SANDBOX_DATASET = Variable.get("sandbox_dataset") | ||
DBT_DATASET = Variable.get("dbt_mart_dataset") | ||
TABLES_ID = Variable.get("table_ids", deserialize_json=True) | ||
DBT_TABLES = Variable.get("dbt_tables", deserialize_json=True) | ||
|
||
start_tables_task = EmptyOperator(task_id="start_tables_task") | ||
start_views_task = EmptyOperator(task_id="start_views_task") | ||
|
||
query_path = get_query_filepath("create_table") | ||
query = file_to_string(query_path) | ||
for table_id in TABLES_ID: | ||
sql_params = { | ||
"project_id": PROJECT, | ||
"dataset_id": DATASET, | ||
"table_id": TABLES_ID[table_id], | ||
"target_dataset": SANDBOX_DATASET, | ||
} | ||
query = query.format(**sql_params) | ||
tables_create_task = BigQueryInsertJobOperator( | ||
task_id=f"create_{table_id}", | ||
configuration={ | ||
"query": { | ||
"query": query, | ||
"useLegacySql": False, | ||
} | ||
}, | ||
on_failure_callback=alert_after_max_retries, | ||
) | ||
|
||
start_tables_task >> tables_create_task | ||
|
||
query_path = get_query_filepath("create_view") | ||
query = file_to_string(query_path) | ||
for dbt_table in DBT_TABLES: | ||
sql_params = { | ||
"project_id": PROJECT, | ||
"dataset_id": DBT_DATASET, | ||
"table_id": DBT_TABLES[dbt_table], | ||
"target_dataset": SANDBOX_DATASET, | ||
} | ||
query = query.format(**sql_params) | ||
dbt_tables_create_task = BigQueryInsertJobOperator( | ||
task_id=f"create_{dbt_table}", | ||
configuration={ | ||
"query": { | ||
"query": query, | ||
"useLegacySql": False, | ||
} | ||
}, | ||
on_failure_callback=alert_after_max_retries, | ||
) | ||
start_views_task >> dbt_tables_create_task |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
""" | ||
This DAG update the Canvas sandbox dataset with transactions tables, state tables with history once a month. | ||
""" | ||
import datetime | ||
import json | ||
|
||
from airflow import DAG | ||
from airflow.models.variable import Variable | ||
from airflow.operators.empty import EmptyOperator | ||
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator | ||
from stellar_etl_airflow.build_bq_insert_job_task import ( | ||
file_to_string, | ||
get_query_filepath, | ||
) | ||
from stellar_etl_airflow.default import ( | ||
alert_after_max_retries, | ||
get_default_dag_args, | ||
init_sentry, | ||
) | ||
|
||
init_sentry() | ||
|
||
with DAG( | ||
"sandbox_update_dag", | ||
default_args=get_default_dag_args(), | ||
start_date=datetime.datetime(2023, 1, 1), | ||
description="This DAG updates a sandbox", | ||
schedule_interval="0 6 1 * *", | ||
params={"alias": "sandbox_dataset"}, | ||
user_defined_filters={"fromjson": lambda s: json.loads(s)}, | ||
catchup=False, | ||
) as dag: | ||
TABLES_ID = Variable.get("table_ids", deserialize_json=True) | ||
PROJECT = Variable.get("bq_project") | ||
BQ_DATASET = Variable.get("bq_dataset") | ||
SANDBOX_DATASET = Variable.get("sandbox_dataset") | ||
|
||
start_tables_task = EmptyOperator(task_id="start_tables_task") | ||
|
||
query_path = get_query_filepath("update_table") | ||
query = file_to_string(query_path) | ||
for table_id in TABLES_ID: | ||
sql_params = { | ||
"project_id": PROJECT, | ||
"dataset_id": BQ_DATASET, | ||
"table_id": TABLES_ID[table_id], | ||
"target_dataset": SANDBOX_DATASET, | ||
} | ||
query = query.format(**sql_params) | ||
tables_update_task = BigQueryInsertJobOperator( | ||
task_id=f"update_{table_id}", | ||
configuration={ | ||
"query": { | ||
"query": query, | ||
"useLegacySql": False, | ||
} | ||
}, | ||
on_failure_callback=alert_after_max_retries, | ||
) | ||
|
||
start_tables_task >> tables_update_task |