Skip to content

Commit

Permalink
Improve logging spark merge query results using logger
Browse files Browse the repository at this point in the history
  • Loading branch information
nrccua-timr committed Apr 5, 2024
1 parent 029017f commit ae35b40
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ History
=======


v0.20.17 (2024-04-05)

* Improve logging spark merge query results using logger instead of show.


v0.20.16 (2024-03-19)

* Remove databricks session spark object and use original method to convert spark to polars df.
Expand Down
8 changes: 6 additions & 2 deletions aioradio/ds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ def merge_spark_df_in_db(df, target, on, partition_by=None, stage_table=None, pa
match_clause = ', '.join(f'{target}.{col} = {stage}.{col}' for col in df.columns if col != 'CREATED_DATETIME')

try:
spark.sql(f'MERGE INTO {target} USING {stage} ON {on_clause} WHEN MATCHED THEN UPDATE SET {match_clause} WHEN NOT MATCHED THEN INSERT *').show()
sql = f'MERGE INTO {target} USING {stage} ON {on_clause} WHEN MATCHED THEN UPDATE SET {match_clause} WHEN NOT MATCHED THEN INSERT *'
stats = spark.sql(sql).toPandas()
logger.info(f"New records: {stats['num_inserted_rows'][0]:,} | Updated records: {stats['num_updated_rows'][0]:,}")
spark.sql(f'DROP TABLE {stage}')
except Exception:
spark.sql(f'DROP TABLE {stage}')
Expand Down Expand Up @@ -160,7 +162,9 @@ def merge_pandas_df_in_db(df, target, on, partition_by=None, stage_table=None):
match_clause = ', '.join(f'{target}.{col} = {stage}.{col}' for col in df.columns if col != 'CREATED_DATETIME')

try:
spark.sql(f'MERGE INTO {target} USING {stage} ON {on_clause} WHEN MATCHED THEN UPDATE SET {match_clause} WHEN NOT MATCHED THEN INSERT *').show()
sql = f'MERGE INTO {target} USING {stage} ON {on_clause} WHEN MATCHED THEN UPDATE SET {match_clause} WHEN NOT MATCHED THEN INSERT *'
stats = spark.sql(sql).toPandas()
logger.info(f"New records: {stats['num_inserted_rows'][0]:,} | Updated records: {stats['num_updated_rows'][0]:,}")
spark.sql(f'DROP TABLE {stage}')
except Exception:
spark.sql(f'DROP TABLE {stage}')
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.20.16',
version='0.20.17',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit ae35b40

Please sign in to comment.