From b74727251e71270394cbfddf10106e40ae87764f Mon Sep 17 00:00:00 2001 From: Tim Reichard Date: Fri, 15 Mar 2024 17:05:44 -0500 Subject: [PATCH] Use spark df.toPandas() function instead of private function --- HISTORY.rst | 5 +++++ aioradio/ds_utils.py | 4 ++-- setup.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index adaf059..29377c1 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,11 @@ History ======= +v0.20.13 (2024-03-15) + +* Use spark df.toPandas() function instead of private function converted spark df -> pandas -> polars. + + v0.20.12 (2024-03-12) * Avoid instantiating spark if databricks-connect installed. diff --git a/aioradio/ds_utils.py b/aioradio/ds_utils.py index 6497ec2..41e76b2 100644 --- a/aioradio/ds_utils.py +++ b/aioradio/ds_utils.py @@ -5,6 +5,7 @@ # pylint: disable=invalid-name # pylint: disable=logging-fstring-interpolation # pylint: disable=no-member +# pylint: disable=not-an-iterable # pylint: disable=protected-access # pylint: disable=too-many-arguments # pylint: disable=too-many-boolean-expressions @@ -27,7 +28,6 @@ import boto3 import numpy as np -import pyarrow as pa import pandas as pd import polars as pl from haversine import haversine, Unit @@ -86,7 +86,7 @@ def ese_db_catalog(env): def sql_to_polars_df(sql): """Get polars DataFrame from SQL query results.""" - return pl.from_arrow(pa.Table.from_batches(spark.sql(sql)._collect_as_arrow())) + return pl.from_pandas(spark.sql(sql).toPandas()) def does_db_table_exists(name): diff --git a/setup.py b/setup.py index ee0b699..3f8618f 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ long_description = fileobj.read() setup(name='aioradio', - version='0.20.12', + version='0.20.13', description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more', long_description=long_description, long_description_content_type="text/markdown",