Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HUBBLE 444 - Refactor Elementary monitoring to run every 30 min #379

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions dags/dbt_data_quality_alerts_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from datetime import datetime

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import get_default_dag_args, init_sentry

init_sentry()

dag = DAG(
"dbt_data_quality_alerts",
default_args=get_default_dag_args(),
start_date=datetime(2024, 6, 11, 0, 0),
description="This DAG runs dbt tests and Elementary alerts at a half-hourly cadence",
schedule_interval="*/30 * * * *",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we change the interval to */15,*/45 * * * *?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it makes much more sense to run in-between the dbt dags.

user_defined_filters={
"container_resources": lambda s: k8s.V1ResourceRequirements(requests=s),
},
max_active_runs=1,
catchup=False,
tags=["dbt-data-quality", "dbt-elementary-alerts"],
)


# DBT tests to run
dbt_unit_tests = dbt_task(
dag,
command_type="test",
tag="singular_test",
)
unit_tests_elementary_alerts = elementary_task(dag, "dbt_data_quality")
start_tests = EmptyOperator(task_id="start_tests_task")

# DAG task graph
start_tests >> dbt_unit_tests >> unit_tests_elementary_alerts
11 changes: 3 additions & 8 deletions dags/dbt_enriched_base_tables_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_cross_dependency_task import build_cross_deps
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import get_default_dag_args, init_sentry

init_sentry()
Expand All @@ -30,16 +29,12 @@
wait_on_state_table = build_cross_deps(dag, "wait_on_state_table", "state_table_export")

# DBT models to run
enriched_history_operations_task = dbt_task(dag, tag="enriched_history_operations")
enriched_history_operations_task = dbt_task(
dag, tag="enriched_history_operations", excluded="singular_test"
)
current_state_task = dbt_task(dag, tag="current_state")

elementary = elementary_task(dag, "dbt_enriched_base_tables")

# DAG task graph
wait_on_history_table >> enriched_history_operations_task

wait_on_state_table >> current_state_task

enriched_history_operations_task >> elementary

current_state_task >> elementary
17 changes: 0 additions & 17 deletions dags/dbt_stellar_marts_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_cross_dependency_task import build_cross_deps
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import get_default_dag_args, init_sentry

init_sentry()
Expand Down Expand Up @@ -49,8 +48,6 @@
soroban = dbt_task(dag, tag="soroban")
snapshot_state = dbt_task(dag, tag="snapshot_state")

elementary = elementary_task(dag, "dbt_stellar_marts")

# DAG task graph
wait_on_dbt_enriched_base_tables >> ohlc_task >> liquidity_pool_trade_volume_task

Expand All @@ -68,17 +65,3 @@
wait_on_dbt_enriched_base_tables >> history_assets
wait_on_dbt_enriched_base_tables >> soroban
wait_on_dbt_enriched_base_tables >> snapshot_state

mgi_task >> elementary
liquidity_providers_task >> elementary
liquidity_pools_values_task >> elementary
liquidity_pools_value_history_task >> elementary
trade_agg_task >> elementary
fee_stats_agg_task >> elementary
asset_stats_agg_task >> elementary
network_stats_agg_task >> elementary
partnership_assets_task >> elementary
history_assets >> elementary
soroban >> elementary
liquidity_pool_trade_volume_task >> elementary
snapshot_state >> elementary
10 changes: 10 additions & 0 deletions dags/stellar_etl_airflow/build_dbt_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def dbt_task(
flag="select",
operator="",
command_type="build",
excluded=None,
resource_cfg="default",
):
namespace = conf.get("kubernetes", "NAMESPACE")
Expand Down Expand Up @@ -97,6 +98,15 @@ def dbt_task(
args.append(",".join(models))
else:
args.append(models[0])
# --exclude selector added for necessary use cases
# Argument should be string or list of strings
if excluded:
task_name = f"{task_name}_with_exclude"
args.append("--exclude")
if isinstance(excluded, list):
args.append(",".join(excluded))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be a comma or a space? I think the comma means it is the intersection of the items in the excluded list whereas space means both are excluded

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, it should be space-separated to provide union for the arguments; I'm fixing it in the next commit.

else:
args.append(excluded)

if Variable.get("dbt_full_refresh_models", deserialize_json=True).get(task_name):
args.append("--full-refresh")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@

from airflow.configuration import conf
from airflow.models import Variable
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (
KubernetesPodOperator,
)
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
from kubernetes import client, config
from kubernetes.client import models as k8s
from stellar_etl_airflow.default import alert_after_max_retries
Expand Down Expand Up @@ -87,4 +85,5 @@ def elementary_task(
on_failure_callback=alert_after_max_retries,
image_pull_policy="IfNotPresent",
image_pull_secrets=[k8s.V1LocalObjectReference("private-docker-auth")],
trigger_rule="all_done",
)
Loading