-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Gabriele Diener
committed
Apr 7, 2020
1 parent
3ef431e
commit 8c1e1bc
Showing
5 changed files
with
101 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,24 @@ | ||
# BigQuery logger handler for Airflow | ||
|
||
pip install airflow-bigquerylogger | ||
## Installation | ||
|
||
`pip install airflow-bigquerylogger` | ||
|
||
## Configuration | ||
|
||
```bash | ||
AIRFLOW__CORE__REMOTE_LOGGING='true' | ||
AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER='gs://bucket/path' | ||
AIRFLOW__CORE__REMOTE_LOG_CONN_ID='gcs_log' | ||
AIRFLOW__CORE__LOGGING_CONFIG_CLASS='bigquerylogger.config.LOGGING_CLASS' | ||
AIRFLOW__CORE__LOG_BIGQUERY_DATASET='dataset.table' | ||
AIRFLOW__CORE__LOG_BIGQUERY_LIMIT=50 | ||
``` | ||
|
||
### Google Cloud BigQuery | ||
|
||
Rows that were written to a table recently via streaming (using the tabledata.insertall method) cannot be modified using UPDATE, DELETE, or MERGE statements. I recommend setting up a table retention! | ||
|
||
## Credits | ||
|
||
Thanks to Bluecore engineering team for [this usefull article](https://medium.com/bluecore-engineering/kubernetes-pod-logging-in-the-airflow-ui-ed9ca6f37e9d). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +0,0 @@ | ||
import os | ||
|
||
from airflow.configuration import conf | ||
from airflow.utils.module_loading import import_string | ||
|
||
|
||
def get_default_logging_config(): | ||
logging_class_path = 'airflow.config_templates.' \ | ||
'airflow_local_settings.DEFAULT_LOGGING_CONFIG' | ||
return import_string(logging_class_path) | ||
|
||
|
||
def set_bigquery_handler(default_logging_config): | ||
remote_logging = conf.getboolean('core', 'remote_logging') | ||
remote_base_log_folder = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') | ||
|
||
if not ( | ||
remote_logging and remote_base_log_folder.startswith('gs://') | ||
): return default_logging_config | ||
|
||
base_log_folder = conf.get('core', 'BASE_LOG_FOLDER') | ||
filename_template = conf.get('core', 'LOG_FILENAME_TEMPLATE') | ||
bigquery_dataset = conf.get('core', 'LOG_BIGQUERY_DATASET') | ||
bigquery_limit = conf.get('core', 'LOG_BIGQUERY_LIMIT', fallback=0) | ||
|
||
bigquery_remote_handlers = { | ||
'task': { | ||
'class': 'bigquerylogger.BQTaskHandler', | ||
'formatter': 'airflow', | ||
'base_log_folder': os.path.expanduser(base_log_folder), | ||
'gcs_log_folder': remote_base_log_folder, | ||
'filename_template': filename_template, | ||
'dataset_name': bigquery_dataset, | ||
'query_limit': bigquery_limit | ||
} | ||
} | ||
|
||
default_logging_config['handlers'].update(bigquery_remote_handlers) | ||
|
||
return default_logging_config | ||
|
||
|
||
default_logging_config = get_default_logging_config() | ||
|
||
CONFIG_CLASS=set_bigquery_handler(default_logging_config) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os | ||
|
||
from airflow.configuration import conf | ||
from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG as LOGGING_CLASS | ||
|
||
|
||
if LOGGING_CLASS['handlers']['task']['class'] != 'bigquerylogger.BQTaskHandler': | ||
|
||
remote_logging = conf.getboolean('core', 'remote_logging') | ||
remote_base_log_folder = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') | ||
|
||
if remote_logging and remote_base_log_folder.startswith('gs://'): | ||
|
||
base_log_folder = conf.get('core', 'BASE_LOG_FOLDER') | ||
filename_template = conf.get('core', 'LOG_FILENAME_TEMPLATE') | ||
bigquery_dataset = conf.get('core', 'LOG_BIGQUERY_DATASET') | ||
bigquery_limit = conf.get('core', 'LOG_BIGQUERY_LIMIT', fallback=200) | ||
|
||
bigquery_remote_handlers = { | ||
'task': { | ||
'class': 'bigquerylogger.BQTaskHandler.BQTaskHandler', | ||
'formatter': 'airflow', | ||
'base_log_folder': os.path.expanduser(base_log_folder), | ||
'gcs_log_folder': remote_base_log_folder, | ||
'filename_template': filename_template, | ||
'dataset_name': bigquery_dataset, | ||
'query_limit': bigquery_limit | ||
} | ||
} | ||
|
||
LOGGING_CLASS['handlers'].update(bigquery_remote_handlers) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters