From 1642d6e84bd36c20908ed2918f89a1393bd29d81 Mon Sep 17 00:00:00 2001 From: Austin Weisgrau <62900254+austinweisgrau@users.noreply.github.com> Date: Thu, 10 Oct 2024 08:32:55 -0700 Subject: [PATCH] BigQuery copy method can convert dict column to JSON string (#1143) By default without this change, a dict column will cause the copy method to fail. --- parsons/google/google_bigquery.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 93e78a474a..49c17ed23a 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1,5 +1,6 @@ import datetime import logging +import json import pickle import random import uuid @@ -745,6 +746,7 @@ def copy( allow_jagged_rows: bool = True, quote: Optional[str] = None, schema: Optional[List[dict]] = None, + convert_dict_columns_to_json: bool = True, **load_kwargs, ): """ @@ -774,6 +776,8 @@ def copy( template_table: str Table name to be used as the load schema. Load operation wil use the same columns and data types as the template table. + convert_dict_columns_to_json: bool + If set to True, will convert any dict columns (which cannot by default be successfully loaded to BigQuery to JSON strings) **load_kwargs: kwargs Arguments to pass to the underlying load_table_from_uri call on the BigQuery client. @@ -796,6 +800,19 @@ def copy( else: csv_delimiter = "," + if convert_dict_columns_to_json: + # Convert dict columns to JSON strings + for field in tbl.get_columns_type_stats(): + if "dict" in field["type"]: + new_petl = tbl.table.addfield( + field["name"] + "_replace", lambda row: json.dumps(row[field["name"]]) + ) + new_tbl = Table(new_petl) + new_tbl.remove_column(field["name"]) + new_tbl.rename_column(field["name"] + "_replace", field["name"]) + new_tbl.materialize() + tbl = new_tbl + job_config = self._process_job_config( job_config=job_config, destination_table_name=table_name,