Skip to content

Commit

Permalink
Remove databricks session spark object
Browse files Browse the repository at this point in the history
  • Loading branch information
nrccua-timr committed Mar 19, 2024
1 parent 08a439f commit 5035c1e
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 11 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ History
=======


v0.20.16 (2024-03-19)

* Remove databricks session spark object and use original method to convert spark to polars df.


v0.20.15 (2024-03-16)

* Explicitly add databricks config to DatabricksSession.
Expand Down
13 changes: 3 additions & 10 deletions aioradio/ds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from haversine import haversine, Unit
from mlflow.entities.model_registry.model_version_status import ModelVersionStatus
from mlflow.tracking.client import MlflowClient
Expand All @@ -48,15 +49,7 @@
c_handler.setFormatter(c_format)
logger.addHandler(c_handler)

try:
from databricks.connect import DatabricksSession
spark = DatabricksSession.builder.remote(
host=os.environ['DATABRICKS_HOST'],
token=os.environ['DATABRICKS_TOKEN'],
cluster_id=os.environ['DATABRICKS_CLUSTER_ID']
).getOrCreate()
except Exception:
spark = SparkSession.builder.getOrCreate()
spark = SparkSession.builder.getOrCreate()


############################### Databricks functions ################################
Expand Down Expand Up @@ -91,7 +84,7 @@ def ese_db_catalog(env):
def sql_to_polars_df(sql):
"""Get polars DataFrame from SQL query results."""

return pl.from_pandas(spark.sql(sql).toPandas())
return pl.from_arrow(pa.Table.from_batches(spark.sql(sql)._collect_as_arrow()))


def does_db_table_exists(name):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.20.15',
version='0.20.16',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 5035c1e

Please sign in to comment.