Skip to content

Commit

Permalink
BigQuery copy method can convert dict column to JSON string
Browse files Browse the repository at this point in the history
By default without this change, a dict column will cause the copy
method to fail.
  • Loading branch information
austinweisgrau committed Oct 10, 2024
1 parent 22c3175 commit 7ce3800
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions parsons/google/google_bigquery.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import json
import pickle
import random
import uuid
Expand Down Expand Up @@ -745,6 +746,7 @@ def copy(
allow_jagged_rows: bool = True,
quote: Optional[str] = None,
schema: Optional[List[dict]] = None,
convert_dict_columns_to_json: bool = True,
**load_kwargs,
):
"""
Expand Down Expand Up @@ -774,6 +776,8 @@ def copy(
template_table: str
Table name to be used as the load schema. Load operation wil use the same
columns and data types as the template table.
convert_dict_columns_to_json: bool
If set to True, will convert any dict columns (which cannot by default be successfully loaded to BigQuery to JSON strings)
**load_kwargs: kwargs
Arguments to pass to the underlying load_table_from_uri call on the BigQuery
client.
Expand All @@ -796,6 +800,19 @@ def copy(
else:
csv_delimiter = ","

if convert_dict_columns_to_json:
# Convert dict columns to JSON strings
for field in tbl.get_columns_type_stats():
if "dict" in field["type"]:
new_petl = tbl.table.addfield(
field["name"] + "_replace", lambda row: json.dumps(row[field["name"]])
)
new_tbl = Table(new_petl)
new_tbl.remove_column(field["name"])
new_tbl.rename_column(field["name"] + "_replace", field["name"])
new_tbl.materialize()
tbl = new_tbl

job_config = self._process_job_config(
job_config=job_config,
destination_table_name=table_name,
Expand Down

0 comments on commit 7ce3800

Please sign in to comment.