Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUBBLE 444] Refactor Elementary monitoring to run every 30 min #413

Merged
merged 7 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions airflow_variables_dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@
"create_sandbox": 2400,
"current_state": 720,
"default": 60,
"elementary_dbt_enriched_base_tables": 1080,
"elementary_dbt_stellar_marts": 1620,
"elementary_dbt_data_quality": 1620,
"enriched_history_operations": 780,
"enriched_history_operations_with_exclude": 780,
"fee_stats": 840,
"history_assets": 720,
"liquidity_pool_trade_volume": 1140,
Expand All @@ -356,6 +356,7 @@
"ohlc": 720,
"partnership_assets": 660,
"relevant_asset_trades": 1200,
"singular_test": 600,
"snapshot_state": 600,
"soroban": 720,
"trade_agg": 720,
Expand Down
5 changes: 3 additions & 2 deletions airflow_variables_prod.json
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,9 @@
"create_sandbox": 1020,
"current_state": 1200,
"default": 60,
"elementary_dbt_enriched_base_tables": 2100,
"elementary_dbt_stellar_marts": 1560,
"elementary_dbt_data_quality": 2100,
"enriched_history_operations": 1800,
"enriched_history_operations_with_exclude": 1800,
"fee_stats": 360,
"history_assets": 360,
"liquidity_pool_trade_volume": 1200,
Expand All @@ -354,6 +354,7 @@
"ohlc": 960,
"partnership_assets": 1380,
"relevant_asset_trades": 1800,
"singular_test": 840,
"snapshot_state": 840,
"soroban": 420,
"trade_agg": 1020,
Expand Down
41 changes: 41 additions & 0 deletions dags/dbt_data_quality_alerts_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from datetime import datetime

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import (
alert_sla_miss,
get_default_dag_args,
init_sentry,
)

init_sentry()

with DAG(
"dbt_data_quality_alerts",
default_args=get_default_dag_args(),
start_date=datetime(2024, 6, 25, 0, 0),
description="This DAG runs dbt tests and Elementary alerts at a half-hourly cadence",
schedule="*/15,*/45 * * * *", # Runs every 15th minute and every 45th minute
user_defined_filters={
"container_resources": lambda s: k8s.V1ResourceRequirements(requests=s),
},
max_active_runs=1,
catchup=False,
tags=["dbt-data-quality", "dbt-elementary-alerts"],
# sla_miss_callback=alert_sla_miss,
) as dag:

# DBT tests to run
singular_tests = dbt_task(
dag,
command_type="test",
tag="singular_test",
)
singular_tests_elementary_alerts = elementary_task(dag, "dbt_data_quality")
start_tests = EmptyOperator(task_id="start_tests_task")

# DAG task graph
start_tests >> singular_tests >> singular_tests_elementary_alerts
11 changes: 3 additions & 8 deletions dags/dbt_enriched_base_tables_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_cross_dependency_task import build_cross_deps
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import (
alert_sla_miss,
get_default_dag_args,
Expand Down Expand Up @@ -35,16 +34,12 @@
wait_on_state_table = build_cross_deps(dag, "wait_on_state_table", "state_table_export")

# DBT models to run
enriched_history_operations_task = dbt_task(dag, tag="enriched_history_operations")
enriched_history_operations_task = dbt_task(
dag, tag="enriched_history_operations", excluded="singular_test"
)
current_state_task = dbt_task(dag, tag="current_state")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this also need excluded="singular_test"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need. I checked with the analytics team and this excluded tag for the singular test is related only to EHO, and the exclusion is mainly to avoid testing the same thing in two different workflows in a similar cadence.


elementary = elementary_task(dag, "dbt_enriched_base_tables")

# DAG task graph
wait_on_history_table >> enriched_history_operations_task

wait_on_state_table >> current_state_task

enriched_history_operations_task >> elementary

current_state_task >> elementary
18 changes: 0 additions & 18 deletions dags/dbt_stellar_marts_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from kubernetes.client import models as k8s
from stellar_etl_airflow.build_cross_dependency_task import build_cross_deps
from stellar_etl_airflow.build_dbt_task import dbt_task
from stellar_etl_airflow.build_elementary_slack_alert_task import elementary_task
from stellar_etl_airflow.default import (
alert_sla_miss,
get_default_dag_args,
Expand Down Expand Up @@ -55,8 +54,6 @@
snapshot_state = dbt_task(dag, tag="snapshot_state")
relevant_asset_trades = dbt_task(dag, tag="relevant_asset_trades")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question for the dbt_tasks in here
Do they need excluded="singular_test" as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same response to the first comment, there's no need.


elementary = elementary_task(dag, "dbt_stellar_marts")

# DAG task graph
wait_on_dbt_enriched_base_tables >> ohlc_task >> liquidity_pool_trade_volume_task

Expand All @@ -75,18 +72,3 @@
wait_on_dbt_enriched_base_tables >> soroban
wait_on_dbt_enriched_base_tables >> snapshot_state
wait_on_dbt_enriched_base_tables >> relevant_asset_trades

mgi_task >> elementary
liquidity_providers_task >> elementary
liquidity_pools_values_task >> elementary
liquidity_pools_value_history_task >> elementary
trade_agg_task >> elementary
fee_stats_agg_task >> elementary
asset_stats_agg_task >> elementary
network_stats_agg_task >> elementary
partnership_assets_task >> elementary
history_assets >> elementary
soroban >> elementary
liquidity_pool_trade_volume_task >> elementary
snapshot_state >> elementary
relevant_asset_trades >> elementary
10 changes: 10 additions & 0 deletions dags/stellar_etl_airflow/build_dbt_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def dbt_task(
flag="select",
operator="",
command_type="build",
excluded=None,
resource_cfg="default",
):
namespace = conf.get("kubernetes", "NAMESPACE")
Expand Down Expand Up @@ -97,6 +98,15 @@ def dbt_task(
args.append(",".join(models))
else:
args.append(models[0])
# --exclude selector added for necessary use cases
# Argument should be string or list of strings
if excluded:
task_name = f"{task_name}_with_exclude"
args.append("--exclude")
if isinstance(excluded, list):
args.append(" ".join(excluded))
else:
args.append(excluded)

if Variable.get("dbt_full_refresh_models", deserialize_json=True).get(task_name):
args.append("--full-refresh")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@

from airflow.configuration import conf
from airflow.models import Variable
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (
KubernetesPodOperator,
)
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
from kubernetes import client, config
from kubernetes.client import models as k8s
from stellar_etl_airflow.default import alert_after_max_retries
Expand Down Expand Up @@ -93,4 +91,5 @@ def elementary_task(
f"elementary_{task_name}"
]
),
trigger_rule="all_done",
)
Loading