From b932c11a480cbce0dd725c1c1ac0eabcfdeb77c1 Mon Sep 17 00:00:00 2001 From: Laysa de Sousa Bitencourt <laysa.bitencourt@indicium.tech> Date: Thu, 20 Jun 2024 13:55:04 -0300 Subject: [PATCH 1/2] pre-commit CI finished --- .github/workflows/lint.yml | 30 ++ .pre-commit-config.yaml | 43 +- airflow_variables_dev.json | 367 +++++++++-------- airflow_variables_prod.json | 368 ++++++++++-------- dags/audit_log_dag.py | 1 + dags/cleanup_metadata_dag.py | 23 +- dags/dataset_reset_dag.py | 1 + dags/ddls/create_default_value_field.sh | 4 +- dags/ddls/create_partitioned_tables.sh | 13 +- dags/ddls/create_state_tables.sh | 9 +- dags/ddls/create_view.sh | 6 +- dags/ddls/delete_gcs_files.sh | 3 +- dags/ddls/update_table_schema.sh | 1 - dags/history_tables_dag.py | 1 + dags/sandbox_create_dag.py | 1 + dags/sandbox_update_dag.py | 1 + dags/state_table_dag.py | 1 + .../build_apply_gcs_changes_to_bq_task.py | 1 + dags/stellar_etl_airflow/build_export_task.py | 1 + .../build_gcs_to_bq_task.py | 1 + dags/stellar_etl_airflow/build_time_task.py | 1 + poststart.sh | 24 +- 22 files changed, 533 insertions(+), 368 deletions(-) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..7427b4c4 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,30 @@ +name: CI Linting + +on: + pull_request: + branches: + - master + +jobs: + pre-commit: + runs-on: ubuntu-latest + if: >- + github.event.pull_request.merged == false && + github.event.pull_request.state == 'open' + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - id: file_changes + uses: trilom/file-changes-action@v1.2.3 + with: + output: " " + + - uses: pre-commit/action@v3.0.0 + env: + extra_args: --color=always --files ${{ steps.file_changes.outputs.files}} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e155bae0..8bdf6090 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,6 @@ -# ci: -# autofix_commit_msg: "[skip ci] `pre-commit` auto fix" -# autofix_prs: true -# autoupdate_branch: "" -# autoupdate_commit_msg: "[skip ci] `pre-commit` autoupdate" -# autoupdate_schedule: weekly -# skip: [] -# submodules: false - repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-added-large-files # prevents giant files from being committed. - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems. @@ -21,25 +12,47 @@ repos: - id: requirements-txt-fixer # sorts entries in requirements.txt. - id: trailing-whitespace # trims trailing whitespace. + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: pretty-format-json # pretty formats json files. + args: ["--autofix"] + + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.8.0-1 + hooks: + - id: shfmt + args: + - -w + # List files that will be formatted + - --list + # Indent by two spaces + - --indent + - "4" + # Binary operators may start a line + - --binary-next-line + # Switch cases are indented + - --case-indent + - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.3 + rev: v3.1.0 hooks: - id: prettier - files: \.(json|markdown|md|yaml|yml)$ + files: \.(markdown|md|yaml|yml)$ language_version: 14.21.3 - repo: https://github.com/hadialqattan/pycln - rev: v2.3.0 + rev: v2.4.0 hooks: - id: pycln - repo: https://github.com/psf/black - rev: 23.11.0 + rev: 24.4.2 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort args: ["--profile=black", "--magic-placement"] diff --git a/airflow_variables_dev.json b/airflow_variables_dev.json index cf44f13a..40bd041a 100644 --- a/airflow_variables_dev.json +++ b/airflow_variables_dev.json @@ -1,34 +1,33 @@ { "api_key_path": "/home/airflow/gcs/data/apiKey.json", "bq_dataset": "test_crypto_stellar_internal", - "bq_project": "test-hubble-319619", "bq_dataset_audit_log": "audit_log", - "date_for_resets": { - "date": ["2023-03-15", "2023-06-14", "2023-09-13", "2023-12-13"] - }, + "bq_project": "test-hubble-319619", "cluster_fields": { - "accounts": ["account_id", "last_modified_ledger"], - "account_signers": ["account_id", "signer", "last_modified_ledger"], - "claimable_balances": ["asset_id", "last_modified_ledger"], - "history_assets": ["asset_code", "asset_issuer", "asset_type"], - "history_effects": ["address", "operation_id", "type"], - "history_ledgers": ["sequence", "closed_at"], - "history_operations": ["transaction_id", "source_account", "type"], - "history_trades": ["selling_asset_id", "buying_asset_id", "trade_type"], - "history_transactions": ["account", "ledger_sequence", "successful"], - "offers": ["selling_asset_id", "buying_asset_id", "last_modified_ledger"], - "liquidity_pools": [ - "liquidity_pool_id", - "asset_a_id", - "asset_b_id", + "account_signers": [ + "account_id", + "signer", "last_modified_ledger" ], - "trust_lines": [ + "accounts": [ "account_id", + "last_modified_ledger" + ], + "claimable_balances": [ "asset_id", - "liquidity_pool_id", "last_modified_ledger" ], + "config_settings": [ + "last_modified_ledger" + ], + "contract_code": [ + "last_modified_ledger", + "contract_code_hash" + ], + "contract_data": [ + "last_modified_ledger", + "contract_id" + ], "enriched_history_operations": [ "ledger_sequence", "transaction_id", @@ -41,22 +40,107 @@ "account", "type" ], - "contract_data": ["last_modified_ledger", "contract_id"], - "contract_code": ["last_modified_ledger", "contract_code_hash"], - "config_settings": ["last_modified_ledger"], - "ttl": ["last_modified_ledger", "key_hash"] + "history_assets": [ + "asset_code", + "asset_issuer", + "asset_type" + ], + "history_effects": [ + "address", + "operation_id", + "type" + ], + "history_ledgers": [ + "sequence", + "closed_at" + ], + "history_operations": [ + "transaction_id", + "source_account", + "type" + ], + "history_trades": [ + "selling_asset_id", + "buying_asset_id", + "trade_type" + ], + "history_transactions": [ + "account", + "ledger_sequence", + "successful" + ], + "liquidity_pools": [ + "liquidity_pool_id", + "asset_a_id", + "asset_b_id", + "last_modified_ledger" + ], + "offers": [ + "selling_asset_id", + "buying_asset_id", + "last_modified_ledger" + ], + "trust_lines": [ + "account_id", + "asset_id", + "liquidity_pool_id", + "last_modified_ledger" + ], + "ttl": [ + "last_modified_ledger", + "key_hash" + ] + }, + "currency_bucket": "currencies_ohlc", + "currency_ohlc": { + "columns_ohlc_currency": [ + "time", + "open", + "high", + "low", + "close" + ], + "currency": "euro_ohlc", + "endpoint": "https://api.coingecko.com/api/v3/coins/tether-eurt/ohlc?vs_currency=usd&days=1", + "table_name": "euro_usd_ohlc" + }, + "date_for_resets": { + "date": [ + "2023-03-15", + "2023-06-14", + "2023-09-13", + "2023-12-13" + ] }, "dbt_dataset_for_test": "test", + "dbt_elementary_dataset": "test_elementary", + "dbt_elementary_secret": "slack-token-elementary", "dbt_full_refresh_models": { "partnership_assets__account_holders_activity_fact": false, "partnership_assets__asset_activity_fact": false }, "dbt_image_name": "stellar/stellar-dbt:b7e0010", + "dbt_internal_source_db": "test-hubble-319619", + "dbt_internal_source_schema": "test_crypto_stellar_internal", "dbt_job_execution_timeout_seconds": 300, "dbt_job_retries": 1, "dbt_mart_dataset": "test_crypto_stellar_dbt", "dbt_maximum_bytes_billed": 250000000000, "dbt_project": "test-hubble-319619", + "dbt_public_source_db": "test-hubble-319619", + "dbt_public_source_schema": "test_crypto_stellar", + "dbt_slack_elementary_channel": "stellar-elementary-alerts", + "dbt_tables": { + "accounts_current": "accounts_current", + "config_settings_current": "config_settings_current", + "contract_code_current": "contract_code_current", + "contract_data_current": "contract_data_current", + "liquidity_pools_current": "liquidity_pools_current", + "offers_current": "offers_current", + "signers_current": "account_signers_current", + "trustlines_current": "trust_lines_current", + "ttl_current": "ttl_current" + }, "dbt_target": "test", "dbt_threads": 12, "gcs_exported_data_bucket_name": "us-central1-test-hubble-2-5f1f2dbf-bucket", @@ -64,16 +148,21 @@ "image_name": "stellar/stellar-etl:274b138", "image_output_path": "/etl/exported_data/", "image_pull_policy": "IfNotPresent", - "kube_config_location": "", - "kubernetes_sidecar_image": "alpine", "k8s_namespace": "hubble-composer", "k8s_service_account": "hubble-composer-service-account", + "kube_config_location": "", + "kubernetes_sidecar_image": "alpine", "local_output_path": "/home/airflow/etlData/", + "max_db_entry_age_in_days": 90, "output_file_names": { "accounts": "accounts.txt", "assets": "assets.txt", "changes": "changes_folder", "claimable_balances": "claimable_balances.txt", + "config_settings": "config_settings.txt", + "contract_code": "contract_code.txt", + "contract_data": "contract_data.txt", + "diagnostic_events": "diagnostic_events.txt", "dimAccounts": "dimAccounts.txt", "dimMarkets": "dimMarkets.txt", "dimOffers": "dimOffers.txt", @@ -88,85 +177,81 @@ "trades": "trades.txt", "transactions": "transactions.txt", "trustlines": "trustlines.txt", - "contract_data": "contract_data.txt", - "contract_code": "contract_code.txt", - "config_settings": "config_settings.txt", - "ttl": "ttl.txt", - "diagnostic_events": "diagnostic_events.txt" + "ttl": "ttl.txt" }, "output_path": "/home/airflow/gcs/data/", "owner": "SDF", "partition_fields": { - "enriched_history_operations": { - "type": "MONTH", - "field": "closed_at" + "account_signers": { + "field": "batch_run_date", + "type": "MONTH" }, - "enriched_meaningful_history_operations": { - "type": "MONTH", - "field": "closed_at" + "accounts": { + "field": "batch_run_date", + "type": "MONTH" }, - "history_assets": { - "type": "MONTH", - "field": "batch_run_date" + "claimable_balances": { + "field": "batch_run_date", + "type": "MONTH" }, - "accounts": { - "type": "MONTH", - "field": "batch_run_date" + "config_settings": { + "field": "closed_at", + "type": "MONTH" }, - "account_signers": { - "type": "MONTH", - "field": "batch_run_date" + "contract_code": { + "field": "closed_at", + "type": "MONTH" }, - "claimable_balances": { - "type": "MONTH", - "field": "batch_run_date" + "contract_data": { + "field": "closed_at", + "type": "MONTH" + }, + "enriched_history_operations": { + "field": "closed_at", + "type": "MONTH" + }, + "enriched_meaningful_history_operations": { + "field": "closed_at", + "type": "MONTH" + }, + "history_assets": { + "field": "batch_run_date", + "type": "MONTH" }, "history_effects": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "history_ledgers": { - "type": "MONTH", - "field": "closed_at" + "field": "closed_at", + "type": "MONTH" }, "history_operations": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "history_trades": { - "type": "MONTH", - "field": "ledger_closed_at" + "field": "ledger_closed_at", + "type": "MONTH" }, "history_transactions": { - "type": "MONTH", - "field": "batch_run_date" - }, - "offers": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "liquidity_pools": { - "type": "MONTH", - "field": "batch_run_date" - }, - "trust_lines": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "mgi": { "field": "tran_evnt_date", "type": "MONTH" }, - "contract_data": { - "field": "closed_at", - "type": "MONTH" - }, - "contract_code": { - "field": "closed_at", + "offers": { + "field": "batch_run_date", "type": "MONTH" }, - "config_settings": { - "field": "closed_at", + "trust_lines": { + "field": "batch_run_date", "type": "MONTH" }, "ttl": { @@ -174,6 +259,7 @@ "type": "MONTH" } }, + "partners_bucket": "ext-partner-sftp", "partners_data": { "mgi": { "prefix_folder": "mgi", @@ -181,47 +267,51 @@ "table": "raw_mgi_stellar_transactions" } }, - "partners_bucket": "ext-partner-sftp", "public_dataset": "test_crypto_stellar", "public_project": "test-hubble-319619", "resources": { - "default": { + "cc": { "requests": { "cpu": "0.3", - "memory": "900Mi", - "ephemeral-storage": "1Gi" + "ephemeral-storage": "1Gi", + "memory": "900Mi" } }, - "cc": { + "default": { "requests": { "cpu": "0.3", - "memory": "900Mi", - "ephemeral-storage": "1Gi" + "ephemeral-storage": "1Gi", + "memory": "900Mi" } }, - "wocc": { + "state": { "requests": { "cpu": "0.3", - "memory": "900Mi", - "ephemeral-storage": "1Gi" + "ephemeral-storage": "1Gi", + "memory": "900Mi" } }, - "state": { + "wocc": { "requests": { "cpu": "0.3", - "memory": "900Mi", - "ephemeral-storage": "1Gi" + "ephemeral-storage": "1Gi", + "memory": "900Mi" } } }, + "sandbox_dataset": "crypto_stellar_internal_sandbox", + "schema_filepath": "/home/airflow/gcs/dags/schemas/", "sentry_dsn": "https://9e0a056541c3445083329b072f2df690@o14203.ingest.us.sentry.io/6190849", "sentry_environment": "development", - "schema_filepath": "/home/airflow/gcs/dags/schemas/", "table_ids": { "accounts": "accounts", "assets": "history_assets", "claimable_balances": "claimable_balances", + "config_settings": "config_settings", + "contract_code": "contract_code", + "contract_data": "contract_data", "effects": "history_effects", + "enriched_history_operations": "enriched_history_operations", "ledgers": "history_ledgers", "liquidity_pools": "liquidity_pools", "offers": "offers", @@ -230,83 +320,52 @@ "trades": "history_trades", "transactions": "history_transactions", "trustlines": "trust_lines", - "enriched_history_operations": "enriched_history_operations", - "contract_data": "contract_data", - "contract_code": "contract_code", - "config_settings": "config_settings", "ttl": "ttl" }, - "task_timeout": { - "build_batch_stats": 180, - "build_bq_insert_job": 180, - "build_copy_table": 180, - "build_dbt_task": 960, - "build_delete_data_task": 180, - "build_export_task": 420, - "build_gcs_to_bq_task": 300, - "build_time_task": 480 - }, "task_sla": { - "default": 60, - "build_time_task": 480, + "asset_stats": 720, + "build_batch_stats": 840, + "build_bq_insert_job": 1080, + "build_delete_data_task": 1020, "build_export_task": 840, - "enriched_history_operations": 540, + "build_gcs_to_bq_task": 960, + "build_time_task": 480, + "cleanup_metadata": 60, + "create_sandbox": 2400, "current_state": 540, + "default": 60, "elementary_dbt_enriched_base_tables": 840, - "ohlc": 720, + "elementary_dbt_stellar_marts": 1620, + "enriched_history_operations": 540, + "fee_stats": 840, + "history_assets": 720, "liquidity_pool_trade_volume": 1140, - "mgi": 660, - "liquidity_providers": 720, "liquidity_pools_value": 840, "liquidity_pools_value_history": 600, - "trade_agg": 720, - "fee_stats": 840, - "asset_stats": 720, + "liquidity_providers": 720, + "mgi": 660, "network_stats": 720, + "ohlc": 720, "partnership_assets": 660, - "history_assets": 720, - "soroban": 720, "snapshot_state": 600, - "elementary_dbt_stellar_marts": 1620, - "create_sandbox": 2400, - "update_sandbox": 60, - "cleanup_metadata": 60, - "build_delete_data_task": 1020, - "build_batch_stats": 840, - "build_bq_insert_job": 1080, - "build_gcs_to_bq_task": 960 + "soroban": 720, + "trade_agg": 720, + "update_sandbox": 60 }, - "dbt_tables": { - "signers_current": "account_signers_current", - "accounts_current": "accounts_current", - "config_settings_current": "config_settings_current", - "contract_code_current": "contract_code_current", - "contract_data_current": "contract_data_current", - "liquidity_pools_current": "liquidity_pools_current", - "offers_current": "offers_current", - "trustlines_current": "trust_lines_current", - "ttl_current": "ttl_current" + "task_timeout": { + "build_batch_stats": 180, + "build_bq_insert_job": 180, + "build_copy_table": 180, + "build_dbt_task": 960, + "build_delete_data_task": 180, + "build_export_task": 420, + "build_gcs_to_bq_task": 300, + "build_time_task": 480 }, + "txmeta_datastore_path": "sdf-ledger-close-meta/ledgers", + "use_captive_core": "False", + "use_futurenet": "False", "use_testnet": "True", - "sandbox_dataset": "crypto_stellar_internal_sandbox", "volume_config": {}, - "volume_name": "etl-data", - "use_futurenet": "False", - "currency_ohlc": { - "currency": "euro_ohlc", - "table_name": "euro_usd_ohlc", - "endpoint": "https://api.coingecko.com/api/v3/coins/tether-eurt/ohlc?vs_currency=usd&days=1", - "columns_ohlc_currency": ["time", "open", "high", "low", "close"] - }, - "currency_bucket": "currencies_ohlc", - "max_db_entry_age_in_days": 90, - "dbt_internal_source_db": "test-hubble-319619", - "dbt_internal_source_schema": "test_crypto_stellar_internal", - "dbt_public_source_db": "test-hubble-319619", - "dbt_public_source_schema": "test_crypto_stellar", - "dbt_slack_elementary_channel": "stellar-elementary-alerts", - "dbt_elementary_secret": "slack-token-elementary", - "dbt_elementary_dataset": "test_elementary", - "use_captive_core": "False", - "txmeta_datastore_path": "sdf-ledger-close-meta/ledgers" + "volume_name": "etl-data" } diff --git a/airflow_variables_prod.json b/airflow_variables_prod.json index c4c14653..3509763b 100644 --- a/airflow_variables_prod.json +++ b/airflow_variables_prod.json @@ -1,30 +1,33 @@ { "api_key_path": "/home/airflow/gcs/data/apiKey.json", "bq_dataset": "crypto_stellar_internal_2", + "bq_dataset_audit_log": "audit_log", "bq_project": "hubble-261722", "cluster_fields": { - "accounts": ["account_id", "last_modified_ledger"], - "account_signers": ["account_id", "signer", "last_modified_ledger"], - "claimable_balances": ["asset_id", "last_modified_ledger"], - "history_assets": ["asset_code", "asset_issuer", "asset_type"], - "history_effects": ["address", "operation_id", "type"], - "history_ledgers": ["sequence", "closed_at"], - "history_operations": ["transaction_id", "source_account", "type"], - "history_trades": ["selling_asset_id", "buying_asset_id", "trade_type"], - "history_transactions": ["account", "ledger_sequence", "successful"], - "offers": ["selling_asset_id", "buying_asset_id", "last_modified_ledger"], - "liquidity_pools": [ - "liquidity_pool_id", - "asset_a_id", - "asset_b_id", + "account_signers": [ + "account_id", + "signer", "last_modified_ledger" ], - "trust_lines": [ + "accounts": [ "account_id", + "last_modified_ledger" + ], + "claimable_balances": [ "asset_id", - "liquidity_pool_id", "last_modified_ledger" ], + "config_settings": [ + "last_modified_ledger" + ], + "contract_code": [ + "last_modified_ledger", + "contract_code_hash" + ], + "contract_data": [ + "last_modified_ledger", + "contract_id" + ], "enriched_history_operations": [ "ledger_sequence", "transaction_id", @@ -37,12 +40,73 @@ "account", "type" ], - "contract_data": ["last_modified_ledger", "contract_id"], - "contract_code": ["last_modified_ledger", "contract_code_hash"], - "config_settings": ["last_modified_ledger"], - "ttl": ["last_modified_ledger", "key_hash"] + "history_assets": [ + "asset_code", + "asset_issuer", + "asset_type" + ], + "history_effects": [ + "address", + "operation_id", + "type" + ], + "history_ledgers": [ + "sequence", + "closed_at" + ], + "history_operations": [ + "transaction_id", + "source_account", + "type" + ], + "history_trades": [ + "selling_asset_id", + "buying_asset_id", + "trade_type" + ], + "history_transactions": [ + "account", + "ledger_sequence", + "successful" + ], + "liquidity_pools": [ + "liquidity_pool_id", + "asset_a_id", + "asset_b_id", + "last_modified_ledger" + ], + "offers": [ + "selling_asset_id", + "buying_asset_id", + "last_modified_ledger" + ], + "trust_lines": [ + "account_id", + "asset_id", + "liquidity_pool_id", + "last_modified_ledger" + ], + "ttl": [ + "last_modified_ledger", + "key_hash" + ] + }, + "currency_bucket": "ext-asset-pricing", + "currency_ohlc": { + "columns_ohlc_currency": [ + "time", + "open", + "high", + "low", + "close" + ], + "currency": "euro_ohlc", + "endpoint": "https://api.coingecko.com/api/v3/coins/tether-eurt/ohlc?vs_currency=usd&days=1", + "table_name": "euro_usd_ohlc" }, "dbt_dataset_for_test": "", + "dbt_elementary_dataset": "elementary", + "dbt_elementary_secret": "slack-token-elementary", "dbt_full_refresh_models": { "history_assets": false, "int_partnership_assets__account_holders_activity": false, @@ -57,11 +121,27 @@ "trade_agg": false }, "dbt_image_name": "stellar/stellar-dbt:b7e0010", + "dbt_internal_source_db": "hubble-261722", + "dbt_internal_source_schema": "crypto_stellar_internal_2", "dbt_job_execution_timeout_seconds": 1800, "dbt_job_retries": 1, "dbt_mart_dataset": "crypto_stellar_dbt", "dbt_maximum_bytes_billed": 100000000000000, "dbt_project": "hubble-261722", + "dbt_public_source_db": "crypto-stellar", + "dbt_public_source_schema": "crypto_stellar", + "dbt_slack_elementary_channel": "alerts-hubble-data-quality", + "dbt_tables": { + "accounts_current": "accounts_current", + "config_settings_current": "config_settings_current", + "contract_code_current": "contract_code_current", + "contract_data_current": "contract_data_current", + "liquidity_pools_current": "liquidity_pools_current", + "offers_current": "offers_current", + "signers_current": "account_signers_current", + "trustlines_current": "trust_lines_current", + "ttl_current": "ttl_current" + }, "dbt_target": "prod", "dbt_threads": 12, "gcs_exported_data_bucket_name": "us-central1-hubble-14c4ca64-bucket", @@ -69,17 +149,22 @@ "image_name": "stellar/stellar-etl:274b138", "image_output_path": "/etl/exported_data/", "image_pull_policy": "IfNotPresent", - "kube_config_location": "", - "kubernetes_sidecar_image": "alpine", "k8s_namespace": "hubble-composer", "k8s_service_account": "hubble-composer-service-account", + "kube_config_location": "", + "kubernetes_sidecar_image": "alpine", "local_output_path": "/home/airflow/etlData/", + "max_db_entry_age_in_days": 180, "namespace": "default", "output_file_names": { "accounts": "accounts.txt", "assets": "assets.txt", "changes": "changes_folder", "claimable_balances": "claimable_balances.txt", + "config_settings": "config_settings.txt", + "contract_code": "contract_code.txt", + "contract_data": "contract_data.txt", + "diagnostic_events": "diagnostic_events.txt", "dimAccounts": "dimAccounts.txt", "dimMarkets": "dimMarkets.txt", "dimOffers": "dimOffers.txt", @@ -94,81 +179,77 @@ "trades": "trades.txt", "transactions": "transactions.txt", "trustlines": "trustlines.txt", - "contract_data": "contract_data.txt", - "contract_code": "contract_code.txt", - "config_settings": "config_settings.txt", - "ttl": "ttl.txt", - "diagnostic_events": "diagnostic_events.txt" + "ttl": "ttl.txt" }, "output_path": "/home/airflow/gcs/data/", "owner": "SDF", "partition_fields": { - "enriched_history_operations": { - "type": "MONTH", - "field": "closed_at" + "account_signers": { + "field": "batch_run_date", + "type": "MONTH" }, - "enriched_meaningful_history_operations": { - "type": "MONTH", - "field": "closed_at" + "accounts": { + "field": "batch_run_date", + "type": "MONTH" }, - "history_assets": { - "type": "MONTH", - "field": "batch_run_date" + "claimable_balances": { + "field": "batch_run_date", + "type": "MONTH" }, - "accounts": { - "type": "MONTH", - "field": "batch_run_date" + "config_settings": { + "field": "closed_at", + "type": "MONTH" }, - "account_signers": { - "type": "MONTH", - "field": "batch_run_date" + "contract_code": { + "field": "closed_at", + "type": "MONTH" }, - "claimable_balances": { - "type": "MONTH", - "field": "batch_run_date" + "contract_data": { + "field": "closed_at", + "type": "MONTH" + }, + "enriched_history_operations": { + "field": "closed_at", + "type": "MONTH" + }, + "enriched_meaningful_history_operations": { + "field": "closed_at", + "type": "MONTH" + }, + "history_assets": { + "field": "batch_run_date", + "type": "MONTH" }, "history_effects": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "history_ledgers": { - "type": "MONTH", - "field": "closed_at" + "field": "closed_at", + "type": "MONTH" }, "history_operations": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "history_trades": { - "type": "MONTH", - "field": "ledger_closed_at" + "field": "ledger_closed_at", + "type": "MONTH" }, "history_transactions": { - "type": "MONTH", - "field": "batch_run_date" - }, - "offers": { - "type": "MONTH", - "field": "batch_run_date" + "field": "batch_run_date", + "type": "MONTH" }, "liquidity_pools": { - "type": "MONTH", - "field": "batch_run_date" - }, - "trust_lines": { - "type": "MONTH", - "field": "batch_run_date" - }, - "contract_data": { - "field": "closed_at", + "field": "batch_run_date", "type": "MONTH" }, - "contract_code": { - "field": "closed_at", + "offers": { + "field": "batch_run_date", "type": "MONTH" }, - "config_settings": { - "field": "closed_at", + "trust_lines": { + "field": "batch_run_date", "type": "MONTH" }, "ttl": { @@ -176,47 +257,59 @@ "type": "MONTH" } }, + "partners_bucket": "ext-partner-sftp", + "partners_data": { + "mgi": { + "prefix_folder": "mgi", + "prefix_id": "stellar_transaction_extract", + "table": "raw_mgi_stellar_transactions" + } + }, "public_dataset": "crypto_stellar", "public_project": "crypto-stellar", - "bq_dataset_audit_log": "audit_log", "resources": { - "default": { + "cc": { "requests": { "cpu": "3.5", - "memory": "5Gi", - "ephemeral-storage": "1Gi" + "ephemeral-storage": "10Gi", + "memory": "15Gi" } }, - "cc": { + "default": { "requests": { "cpu": "3.5", - "memory": "15Gi", - "ephemeral-storage": "10Gi" + "ephemeral-storage": "1Gi", + "memory": "5Gi" } }, - "wocc": { + "state": { "requests": { "cpu": "3.5", - "memory": "15Gi", - "ephemeral-storage": "10Gi" + "ephemeral-storage": "12Gi", + "memory": "20Gi" } }, - "state": { + "wocc": { "requests": { "cpu": "3.5", - "memory": "20Gi", - "ephemeral-storage": "12Gi" + "ephemeral-storage": "10Gi", + "memory": "15Gi" } } }, + "sandbox_dataset": "crypto_stellar_internal_sandbox", + "schema_filepath": "/home/airflow/gcs/dags/schemas/", "sentry_dsn": "https://94027cdcc4c9470f9dafa2c0b456c2c9@o14203.ingest.us.sentry.io/5806618", "sentry_environment": "production", - "schema_filepath": "/home/airflow/gcs/dags/schemas/", "table_ids": { "accounts": "accounts", "assets": "history_assets", "claimable_balances": "claimable_balances", + "config_settings": "config_settings", + "contract_code": "contract_code", + "contract_data": "contract_data", "effects": "history_effects", + "enriched_history_operations": "enriched_history_operations", "ledgers": "history_ledgers", "liquidity_pools": "liquidity_pools", "offers": "offers", @@ -225,91 +318,52 @@ "trades": "history_trades", "transactions": "history_transactions", "trustlines": "trust_lines", - "enriched_history_operations": "enriched_history_operations", - "contract_data": "contract_data", - "contract_code": "contract_code", - "config_settings": "config_settings", "ttl": "ttl" }, - "task_timeout": { - "build_batch_stats": 180, - "build_bq_insert_job": 180, - "build_copy_table": 180, - "build_dbt_task": 1800, - "build_delete_data_task": 180, - "build_export_task": 300, - "build_gcs_to_bq_task": 300, - "build_time_task": 360 - }, "task_sla": { - "default": 60, - "build_time_task": 300, + "asset_stats": 420, + "build_batch_stats": 600, + "build_bq_insert_job": 840, + "build_delete_data_task": 780, "build_export_task": 600, - "enriched_history_operations": 960, + "build_gcs_to_bq_task": 660, + "build_time_task": 300, + "cleanup_metadata": 60, + "create_sandbox": 1020, "current_state": 600, + "default": 60, "elementary_dbt_enriched_base_tables": 1080, - "ohlc": 960, + "elementary_dbt_stellar_marts": 1560, + "enriched_history_operations": 960, + "fee_stats": 360, + "history_assets": 360, "liquidity_pool_trade_volume": 1200, - "mgi": 1020, - "liquidity_providers": 720, "liquidity_pools_value": 360, "liquidity_pools_value_history": 360, - "trade_agg": 1020, - "fee_stats": 360, - "asset_stats": 420, + "liquidity_providers": 720, + "mgi": 1020, "network_stats": 360, + "ohlc": 960, "partnership_assets": 1380, - "history_assets": 360, - "soroban": 420, "snapshot_state": 840, - "elementary_dbt_stellar_marts": 1560, - "create_sandbox": 1020, - "update_sandbox": 5460, - "cleanup_metadata": 60, - "build_delete_data_task": 780, - "build_batch_stats": 600, - "build_bq_insert_job": 840, - "build_gcs_to_bq_task": 660 + "soroban": 420, + "trade_agg": 1020, + "update_sandbox": 5460 }, - "dbt_tables": { - "signers_current": "account_signers_current", - "accounts_current": "accounts_current", - "config_settings_current": "config_settings_current", - "contract_code_current": "contract_code_current", - "contract_data_current": "contract_data_current", - "liquidity_pools_current": "liquidity_pools_current", - "offers_current": "offers_current", - "trustlines_current": "trust_lines_current", - "ttl_current": "ttl_current" + "task_timeout": { + "build_batch_stats": 180, + "build_bq_insert_job": 180, + "build_copy_table": 180, + "build_dbt_task": 1800, + "build_delete_data_task": 180, + "build_export_task": 300, + "build_gcs_to_bq_task": 300, + "build_time_task": 360 }, + "txmeta_datastore_path": "sdf-ledger-close-metas/ledgers", + "use_captive_core": "False", + "use_futurenet": "False", "use_testnet": "False", - "sandbox_dataset": "crypto_stellar_internal_sandbox", "volume_config": "{}", - "volume_name": "etl-data", - "partners_data": { - "mgi": { - "prefix_folder": "mgi", - "prefix_id": "stellar_transaction_extract", - "table": "raw_mgi_stellar_transactions" - } - }, - "partners_bucket": "ext-partner-sftp", - "use_futurenet": "False", - "currency_ohlc": { - "currency": "euro_ohlc", - "table_name": "euro_usd_ohlc", - "endpoint": "https://api.coingecko.com/api/v3/coins/tether-eurt/ohlc?vs_currency=usd&days=1", - "columns_ohlc_currency": ["time", "open", "high", "low", "close"] - }, - "currency_bucket": "ext-asset-pricing", - "max_db_entry_age_in_days": 180, - "dbt_internal_source_db": "hubble-261722", - "dbt_internal_source_schema": "crypto_stellar_internal_2", - "dbt_public_source_db": "crypto-stellar", - "dbt_public_source_schema": "crypto_stellar", - "dbt_slack_elementary_channel": "alerts-hubble-data-quality", - "dbt_elementary_secret": "slack-token-elementary", - "dbt_elementary_dataset": "elementary", - "use_captive_core": "False", - "txmeta_datastore_path": "sdf-ledger-close-metas/ledgers" + "volume_name": "etl-data" } diff --git a/dags/audit_log_dag.py b/dags/audit_log_dag.py index ffa75a60..db45bd47 100644 --- a/dags/audit_log_dag.py +++ b/dags/audit_log_dag.py @@ -1,6 +1,7 @@ """ This DAG runs an audit log SQL to update the audit log dashboard. """ + from datetime import datetime from json import loads diff --git a/dags/cleanup_metadata_dag.py b/dags/cleanup_metadata_dag.py index 5a058a94..e3794890 100644 --- a/dags/cleanup_metadata_dag.py +++ b/dags/cleanup_metadata_dag.py @@ -3,6 +3,7 @@ out the DagRun, TaskInstance, Log, XCom, Job DB and SlaMiss entries to avoid having too much data in your Airflow MetaStore. """ + import logging from datetime import timedelta @@ -61,9 +62,11 @@ }, { "airflow_db_model": TaskInstance, - "age_check_column": TaskInstance.execution_date - if AIRFLOW_VERSION < ["2", "2", "0"] - else TaskInstance.start_date, + "age_check_column": ( + TaskInstance.execution_date + if AIRFLOW_VERSION < ["2", "2", "0"] + else TaskInstance.start_date + ), "keep_last": False, "keep_last_filters": None, "keep_last_group_by": None, @@ -77,9 +80,9 @@ }, { "airflow_db_model": XCom, - "age_check_column": XCom.execution_date - if AIRFLOW_VERSION < ["2", "2", "5"] - else XCom.timestamp, + "age_check_column": ( + XCom.execution_date if AIRFLOW_VERSION < ["2", "2", "5"] else XCom.timestamp + ), "keep_last": False, "keep_last_filters": None, "keep_last_group_by": None, @@ -107,9 +110,11 @@ DATABASE_OBJECTS.append( { "airflow_db_model": TaskReschedule, - "age_check_column": TaskReschedule.execution_date - if AIRFLOW_VERSION < ["2", "2", "0"] - else TaskReschedule.start_date, + "age_check_column": ( + TaskReschedule.execution_date + if AIRFLOW_VERSION < ["2", "2", "0"] + else TaskReschedule.start_date + ), "keep_last": False, "keep_last_filters": None, "keep_last_group_by": None, diff --git a/dags/dataset_reset_dag.py b/dags/dataset_reset_dag.py index 5632b339..424a1868 100644 --- a/dags/dataset_reset_dag.py +++ b/dags/dataset_reset_dag.py @@ -1,6 +1,7 @@ """ When the Test net server is reset, the dataset reset DAG deletes all the datasets in the test Hubble. """ + from ast import literal_eval from datetime import datetime from json import loads diff --git a/dags/ddls/create_default_value_field.sh b/dags/ddls/create_default_value_field.sh index 09c4d3fe..d0edcb2c 100755 --- a/dags/ddls/create_default_value_field.sh +++ b/dags/ddls/create_default_value_field.sh @@ -21,9 +21,9 @@ FIELD=batch_insert_ts echo "Creating default value field $FIELD in $TABLE in $DATASET_ID" bq query --use_legacy_sql=false \ -"ALTER TABLE \`$PROJECT_ID.$DATASET_ID.$TABLE\` \ + "ALTER TABLE \`$PROJECT_ID.$DATASET_ID.$TABLE\` \ ADD COLUMN $FIELD TIMESTAMP;" bq query --use_legacy_sql=false \ -"ALTER TABLE \`$PROJECT_ID.$DATASET_ID.$TABLE\` \ + "ALTER TABLE \`$PROJECT_ID.$DATASET_ID.$TABLE\` \ ALTER COLUMN $FIELD SET DEFAULT CURRENT_TIMESTAMP();" diff --git a/dags/ddls/create_partitioned_tables.sh b/dags/ddls/create_partitioned_tables.sh index d407a776..0c484aa7 100755 --- a/dags/ddls/create_partitioned_tables.sh +++ b/dags/ddls/create_partitioned_tables.sh @@ -24,8 +24,7 @@ SCHEMA_DIR=$WORKDIR/schemas/ PARTITION_TABLES=(history_operations history_transactions history_ledgers history_assets history_trades history_effects accounts claimable_balances offers liquidity_pools account_signers trust_lines) # make partitioned tables -for table in ${PARTITION_TABLES[@]} -do +for table in ${PARTITION_TABLES[@]}; do echo "Creating partitioned table $table in $DATASET_ID" if [ "$table" = "history_operations" ]; then cluster=transaction_id,source_account,type @@ -65,9 +64,9 @@ do partition=closed_at fi bq mk --table \ - --schema $SCHEMA_DIR${table}_schema.json \ - --time_partitioning_field $partition \ - --time_partitioning_type MONTH \ - --clustering_fields $cluster \ - $PROJECT_ID:$DATASET_ID.$table + --schema $SCHEMA_DIR${table}_schema.json \ + --time_partitioning_field $partition \ + --time_partitioning_type MONTH \ + --clustering_fields $cluster \ + $PROJECT_ID:$DATASET_ID.$table done diff --git a/dags/ddls/create_state_tables.sh b/dags/ddls/create_state_tables.sh index 2a1002cc..d6bf043b 100755 --- a/dags/ddls/create_state_tables.sh +++ b/dags/ddls/create_state_tables.sh @@ -26,11 +26,10 @@ SCHEMA_DIR=$WORKDIR/schemas/ STATE_TABLES=(accounts liquidity_pools offers trust_lines) # make state tables -for table in ${STATE_TABLES[@]} -do +for table in ${STATE_TABLES[@]}; do echo "Creating state table $table in $DATASET_ID" bq mk --table \ - --schema $SCHEMA_DIR${table}_schema.json \ - --clustering_fields last_modified_ledger \ - $PROJECT_ID:$DATASET_ID.$table + --schema $SCHEMA_DIR${table}_schema.json \ + --clustering_fields last_modified_ledger \ + $PROJECT_ID:$DATASET_ID.$table done diff --git a/dags/ddls/create_view.sh b/dags/ddls/create_view.sh index cfc74b53..0ae3fad2 100755 --- a/dags/ddls/create_view.sh +++ b/dags/ddls/create_view.sh @@ -30,8 +30,8 @@ fi # read view sql # query=$(<$QUERY_DIR$view.sql) -query=`cat $QUERY_DIR$view.sql` -if [ ${#query} <= 0 ]; then +query=$(cat $QUERY_DIR$view.sql) +if [ ${#query} -le 0 ]; then echo "$QUERY_DIR$view.sql is empty. Please provide a valid .sql file." exit 1 fi @@ -42,7 +42,7 @@ query=${query//"DATASET"/$DATASET_ID} echo "Creating view $view in $DATASET_ID" bq mk \ - --use_legacy_sql=false \ + --use_legacy_sql=false \ --view "$query" \ --project_id $PROJECT_ID \ $DATASET_ID.$view diff --git a/dags/ddls/delete_gcs_files.sh b/dags/ddls/delete_gcs_files.sh index 52fa6d0e..52a7bcb5 100755 --- a/dags/ddls/delete_gcs_files.sh +++ b/dags/ddls/delete_gcs_files.sh @@ -24,8 +24,7 @@ END_DAY=$5 GCS_BUCKET=gs://us-central1-hubble-2-d948d67b-bucket/dag-exported/scheduled__ # make state tables -for day in $(seq $START_DAY $END_DAY) -do +for day in $(seq $START_DAY $END_DAY); do echo "Removing files matching pattern $PATTERN for month $MONTH and day $day" if [ $day -lt 10 ]; then gsutil -m rm -rf $GCS_BUCKET$YEAR-$MONTH-0$day*/$PATTERN* diff --git a/dags/ddls/update_table_schema.sh b/dags/ddls/update_table_schema.sh index 0556fbb9..a82d982c 100755 --- a/dags/ddls/update_table_schema.sh +++ b/dags/ddls/update_table_schema.sh @@ -26,5 +26,4 @@ if [ -z "${table}" ]; then exit 1 fi - bq update ${PROJECT_ID}:${DATASET_ID}."${table}" schemas/"${table}"_schema.json diff --git a/dags/history_tables_dag.py b/dags/history_tables_dag.py index 3dcda1f8..ece0e4ed 100644 --- a/dags/history_tables_dag.py +++ b/dags/history_tables_dag.py @@ -2,6 +2,7 @@ The history_archive_export DAG exports operations and trades from the history archives. It is scheduled to export information to BigQuery at regular intervals. """ + from ast import literal_eval from datetime import datetime from json import loads diff --git a/dags/sandbox_create_dag.py b/dags/sandbox_create_dag.py index de047517..5b6b76da 100644 --- a/dags/sandbox_create_dag.py +++ b/dags/sandbox_create_dag.py @@ -1,6 +1,7 @@ """ This DAG creates the sandbox dataset with transactions tables, state tables with history and views. """ + from datetime import timedelta from json import loads diff --git a/dags/sandbox_update_dag.py b/dags/sandbox_update_dag.py index afd04525..1e877998 100644 --- a/dags/sandbox_update_dag.py +++ b/dags/sandbox_update_dag.py @@ -1,6 +1,7 @@ """ This DAG update the Canvas sandbox dataset with transactions tables, state tables with history once a month. """ + from datetime import datetime from json import loads diff --git a/dags/state_table_dag.py b/dags/state_table_dag.py index b0aa7350..bc4d3d80 100644 --- a/dags/state_table_dag.py +++ b/dags/state_table_dag.py @@ -2,6 +2,7 @@ The state_table_export DAG exports ledger entry changes (accounts, offers, and trustlines) within a bounded range using stellar-core. This DAG should be triggered manually if it is required to export entry changes within a specified time range. """ + from ast import literal_eval from datetime import datetime from json import loads diff --git a/dags/stellar_etl_airflow/build_apply_gcs_changes_to_bq_task.py b/dags/stellar_etl_airflow/build_apply_gcs_changes_to_bq_task.py index 5eddeb09..e4bb08e3 100644 --- a/dags/stellar_etl_airflow/build_apply_gcs_changes_to_bq_task.py +++ b/dags/stellar_etl_airflow/build_apply_gcs_changes_to_bq_task.py @@ -2,6 +2,7 @@ This file contains functions for creating Airflow tasks to merge data on ledger entry changes from a file in Google Cloud storage into a BigQuery table. """ + import logging from datetime import timedelta from json import loads diff --git a/dags/stellar_etl_airflow/build_export_task.py b/dags/stellar_etl_airflow/build_export_task.py index 79f82ace..9c7e3503 100644 --- a/dags/stellar_etl_airflow/build_export_task.py +++ b/dags/stellar_etl_airflow/build_export_task.py @@ -1,6 +1,7 @@ """ This file contains functions for creating Airflow tasks to run stellar-etl export functions. """ + import logging import os from datetime import datetime, timedelta diff --git a/dags/stellar_etl_airflow/build_gcs_to_bq_task.py b/dags/stellar_etl_airflow/build_gcs_to_bq_task.py index 0ebe3d74..9e4a824a 100644 --- a/dags/stellar_etl_airflow/build_gcs_to_bq_task.py +++ b/dags/stellar_etl_airflow/build_gcs_to_bq_task.py @@ -1,6 +1,7 @@ """ This file contains functions for creating Airflow tasks to load files from Google Cloud Storage into BigQuery. """ + from datetime import timedelta from airflow.models import Variable diff --git a/dags/stellar_etl_airflow/build_time_task.py b/dags/stellar_etl_airflow/build_time_task.py index 6f5dd471..eab9ff6d 100644 --- a/dags/stellar_etl_airflow/build_time_task.py +++ b/dags/stellar_etl_airflow/build_time_task.py @@ -1,6 +1,7 @@ """ This file contains functions for creating Airflow tasks to convert from a time range to a ledger range. """ + import logging from datetime import timedelta diff --git a/poststart.sh b/poststart.sh index effa28e4..6501dc84 100644 --- a/poststart.sh +++ b/poststart.sh @@ -15,37 +15,35 @@ # on the airflow-worker nodes. move_file_if_closed() { - preFileSize=$(wc -c < "$1") - sleep 10; - postFileSize=$(wc -c < "$1") - lsof "$1"; + preFileSize=$(wc -c <"$1") + sleep 10 + postFileSize=$(wc -c <"$1") + lsof "$1" # if lsof returns an error code of 1, it means the file is not opened by any # other processes, allowing us to move it safely. Confirm there is data first. - if [[ $? == 1 && $preFileSize -gt 0]]; then + if [[ $? == 1 && $preFileSize -gt 0 ]]; then # File size is measured 10 seconds apart to confirm any batch process finished # writing to the file. If the sizes do not match, the file is still in use. if [[ $preFileSize == $postFileSize ]]; then - mv "$1" /home/airflow/gcs/data/; + mv "$1" /home/airflow/gcs/data/ fi fi } -sudo apt-get update && sudo apt-get install lsof; +sudo apt-get update && sudo apt-get install lsof while true; do - for file in /home/airflow/etlData/* - do + for file in /home/airflow/etlData/*; do if [[ -f "$file" ]]; then move_file_if_closed "$file" else - for changeFile in "$file"/* - do + for changeFile in "$file"/*; do if [[ -f "$changeFile" ]]; then move_file_if_closed "$changeFile" - fi + fi done fi done - sleep 1; + sleep 1 done >/dev/null 2>&1 & disown disown -a From 3b376972c20e665787641a3009b650e9276ff5a8 Mon Sep 17 00:00:00 2001 From: Laysa de Sousa Bitencourt <laysa.bitencourt@indicium.tech> Date: Thu, 20 Jun 2024 14:39:34 -0300 Subject: [PATCH 2/2] fixed CIs --- .github/workflows/ci.yml | 12 ++++++++++++ .github/workflows/lint.yml | 30 ------------------------------ 2 files changed, 12 insertions(+), 30 deletions(-) delete mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 60ace9ed..3da3025d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,12 +20,24 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - id: file_changes + uses: trilom/file-changes-action@v1.2.3 + with: + output: " " + - name: Setup Python uses: actions/setup-python@v4 with: python-version: 3.8 - uses: pre-commit/action@v3.0.0 + env: + extra_args: --color=always --files ${{ steps.file_changes.outputs.files}} tests: runs-on: ubuntu-latest diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 7427b4c4..00000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: CI Linting - -on: - pull_request: - branches: - - master - -jobs: - pre-commit: - runs-on: ubuntu-latest - if: >- - github.event.pull_request.merged == false && - github.event.pull_request.state == 'open' - - steps: - - uses: actions/checkout@v3 - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - - id: file_changes - uses: trilom/file-changes-action@v1.2.3 - with: - output: " " - - - uses: pre-commit/action@v3.0.0 - env: - extra_args: --color=always --files ${{ steps.file_changes.outputs.files}}