Skip to content

Commit

Permalink
Merge pull request #139 from nrccua/DS-528-add-capability-to-alter-co…
Browse files Browse the repository at this point in the history
…lumns-in-databricks-tabl

Add convenience funtion alter_db_table_column to add/drop databricks table columns
  • Loading branch information
nrccua-timr authored Oct 10, 2024
2 parents ed1b749 + 6c4b3e5 commit a5bcf15
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 21 deletions.
13 changes: 13 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@ History
=======


v0.21.1 (2024-10-10)

* Add convenience funtion alter_db_table_column to add/drop databricks table columns.
* No longer require installing haversine.
* Update fakeredis==2.25.1.
* Update mlflow==2.16.2.
* Update pandas==2.2.3.
* Update polars==1.9.0.
* Update pylint==3.3.1.
* Update pysmb==1.2.10.
* Update redis==5.1.1.


v0.21.0 (2024-09-17)

* Update cython==3.0.11.
Expand Down
1 change: 1 addition & 0 deletions aioradio/aws/dynamodb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generic async AWS functions for DynamoDB."""

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

from typing import Any, Dict, List

Expand Down
1 change: 1 addition & 0 deletions aioradio/aws/sqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=dangerous-default-value
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

import logging
from typing import Any, Dict, List
Expand Down
21 changes: 13 additions & 8 deletions aioradio/ds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# pylint: disable=protected-access
# pylint: disable=too-many-arguments
# pylint: disable=too-many-boolean-expressions
# pylint: disable=too-many-positional-arguments
# pylint: disable=unnecessary-comprehension
# pylint: disable=unused-argument
# pylint: disable=unused-import
Expand All @@ -32,7 +33,6 @@
import polars as pl
import pyarrow as pa
import pyarrow.dataset as ds
from haversine import haversine, Unit
from mlflow.entities.model_registry.model_version_status import ModelVersionStatus
from mlflow.tracking.client import MlflowClient
from pyspark.sql import SparkSession
Expand All @@ -56,6 +56,17 @@
############################### Databricks functions ################################


def alter_db_table_column(table: str, column: str, cmd: str, dtype: str=''):
"""Convenience function to either add or drop a single column in a
databricks table."""

cmd = cmd.upper()
if cmd == 'ADD':
spark.sql(f'ALTER TABLE {table} ADD COLUMN ({column} {dtype})')
elif cmd == 'DROP':
spark.sql(f'ALTER TABLE {table} DROP COLUMN IF EXISTS ({column})')


def db_catalog(env):
"""Return the DataBricks catalog based on the passed in environment."""

Expand Down Expand Up @@ -237,7 +248,7 @@ def promote_model_to_production(model_name, tags):
logger.info(f"Model status: {ModelVersionStatus.to_string(status)}")
if status == ModelVersionStatus.READY:
break
time.sleep(1)
sleep(1)

registered_model = client.get_registered_model(model_name)
logger.info(f"registered_model: {registered_model}")
Expand Down Expand Up @@ -479,12 +490,6 @@ def apply_bearing(dataframe, latitude, longitude):
return dataframe.apply(lambda x: bearing(x.LATITUDE, latitude, x.LONGITUDE, longitude), axis=1)


def apply_haversine(dataframe, latitude, longitude):
"""Apply haversine function on split dataframe."""

return dataframe.apply(lambda x: haversine((x.LATITUDE, x.LONGITUDE), (latitude, longitude), unit=Unit.MILES), axis=1)


def logit(x, a, b, c, d):
"""Logit function."""

Expand Down
1 change: 1 addition & 0 deletions aioradio/file_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# pylint: disable=too-many-lines
# pylint: disable=too-many-locals
# pylint: disable=too-many-nested-blocks
# pylint: disable=too-many-positional-arguments
# pylint: disable=too-many-public-methods

import asyncio
Expand Down
1 change: 1 addition & 0 deletions aioradio/psycopg2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=c-extension-no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

import psycopg2

Expand Down
1 change: 1 addition & 0 deletions aioradio/pyodbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=c-extension-no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
# pylint: disable=unsubscriptable-object

import os
Expand Down
1 change: 1 addition & 0 deletions aioradio/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-positional-arguments
# pylint: disable=unsubscriptable-object

import hashlib
Expand Down
16 changes: 8 additions & 8 deletions aioradio/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,36 @@ botocore==1.34.131
cython==3.0.11
databricks-connect==14.3.1
ddtrace==2.6.5
fakeredis==2.24.1
fakeredis==2.25.1
faust-cchardet==2.1.19
flask==3.0.3
flask-cors==4.0.1
grpcio==1.62.2
grpcio-status==1.62.2
haversine==2.8.1
httpx==0.27.2
importlib-metadata==8.4.0
mandrill==1.0.60
mlflow==2.14.3
mlflow==2.16.2
moto==4.2.14
numpy==1.26.4
openpyxl==3.0.10
orjson==3.9.15
pandas==2.2.2
pandas==2.2.3
pkginfo==1.10.0
polars==1.7.1
polars==1.9.0
pre-commit==3.8.0
protobuf==4.25.4
psycopg2-binary==2.9.9
pyarrow==15.0.2
pylint==3.2.7
pylint==3.3.1
pyodbc==5.1.0 --no-binary=pyodbc
pysmb==1.2.9.1
pysmb==1.2.10
pyspark==3.4.3
pytest==8.1.2
pytest-asyncio==0.21.1
pytest-cov==5.0.0
python-json-logger==2.0.7
redis==5.0.8
redis==5.1.1
twine==5.1.1
typing_extensions==4.11.0
werkzeug==3.0.4
Expand Down
5 changes: 2 additions & 3 deletions aioradio/tests/file_ingestion_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,10 @@ async def func():


@pytest.mark.asyncio
async def test_async_db_wrapper(user):
async def test_async_db_wrapper():
"""Test async_db_wrapper with database connections."""

if user != 'tim.reichard':
pytest.skip('Skip test_async_db_wrapper since user is not Tim Reichard')
pytest.skip('Skip test_async_db_wrapper since we no longer have access to aws secrets locally.')

db_info=[{
'db': 'pyodbc',
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.21.0',
version='0.21.1',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand All @@ -31,7 +31,6 @@
'fakeredis>=2.20.0',
'grpcio==1.62.2',
'grpcio-status==1.62.2',
'haversine>=2.8.0',
'httpx>=0.23.0',
'mandrill>=1.0.60',
'mlflow>=2.10.2',
Expand Down

0 comments on commit a5bcf15

Please sign in to comment.