From 6c4b3e5d2e237bce31ede77d02189b633d70972b Mon Sep 17 00:00:00 2001 From: Tim Reichard Date: Thu, 10 Oct 2024 09:30:26 -0500 Subject: [PATCH] Add convenience funtion alter_db_table_column to add/drop databricks table columns --- HISTORY.rst | 13 +++++++++++++ aioradio/aws/dynamodb.py | 1 + aioradio/aws/sqs.py | 1 + aioradio/ds_utils.py | 21 +++++++++++++-------- aioradio/file_ingestion.py | 1 + aioradio/psycopg2.py | 1 + aioradio/pyodbc.py | 1 + aioradio/redis.py | 1 + aioradio/requirements.txt | 16 ++++++++-------- aioradio/tests/file_ingestion_test.py | 5 ++--- setup.py | 3 +-- 11 files changed, 43 insertions(+), 21 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 8c45af6..980a6c9 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,19 @@ History ======= +v0.21.1 (2024-10-10) + +* Add convenience funtion alter_db_table_column to add/drop databricks table columns. +* No longer require installing haversine. +* Update fakeredis==2.25.1. +* Update mlflow==2.16.2. +* Update pandas==2.2.3. +* Update polars==1.9.0. +* Update pylint==3.3.1. +* Update pysmb==1.2.10. +* Update redis==5.1.1. + + v0.21.0 (2024-09-17) * Update cython==3.0.11. diff --git a/aioradio/aws/dynamodb.py b/aioradio/aws/dynamodb.py index 31f0f15..e17952f 100644 --- a/aioradio/aws/dynamodb.py +++ b/aioradio/aws/dynamodb.py @@ -1,6 +1,7 @@ """Generic async AWS functions for DynamoDB.""" # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments from typing import Any, Dict, List diff --git a/aioradio/aws/sqs.py b/aioradio/aws/sqs.py index 92dc4d4..61072d0 100644 --- a/aioradio/aws/sqs.py +++ b/aioradio/aws/sqs.py @@ -2,6 +2,7 @@ # pylint: disable=dangerous-default-value # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments import logging from typing import Any, Dict, List diff --git a/aioradio/ds_utils.py b/aioradio/ds_utils.py index 2dc0f12..227fae8 100644 --- a/aioradio/ds_utils.py +++ b/aioradio/ds_utils.py @@ -9,6 +9,7 @@ # pylint: disable=protected-access # pylint: disable=too-many-arguments # pylint: disable=too-many-boolean-expressions +# pylint: disable=too-many-positional-arguments # pylint: disable=unnecessary-comprehension # pylint: disable=unused-argument # pylint: disable=unused-import @@ -32,7 +33,6 @@ import polars as pl import pyarrow as pa import pyarrow.dataset as ds -from haversine import haversine, Unit from mlflow.entities.model_registry.model_version_status import ModelVersionStatus from mlflow.tracking.client import MlflowClient from pyspark.sql import SparkSession @@ -56,6 +56,17 @@ ############################### Databricks functions ################################ +def alter_db_table_column(table: str, column: str, cmd: str, dtype: str=''): + """Convenience function to either add or drop a single column in a + databricks table.""" + + cmd = cmd.upper() + if cmd == 'ADD': + spark.sql(f'ALTER TABLE {table} ADD COLUMN ({column} {dtype})') + elif cmd == 'DROP': + spark.sql(f'ALTER TABLE {table} DROP COLUMN IF EXISTS ({column})') + + def db_catalog(env): """Return the DataBricks catalog based on the passed in environment.""" @@ -237,7 +248,7 @@ def promote_model_to_production(model_name, tags): logger.info(f"Model status: {ModelVersionStatus.to_string(status)}") if status == ModelVersionStatus.READY: break - time.sleep(1) + sleep(1) registered_model = client.get_registered_model(model_name) logger.info(f"registered_model: {registered_model}") @@ -479,12 +490,6 @@ def apply_bearing(dataframe, latitude, longitude): return dataframe.apply(lambda x: bearing(x.LATITUDE, latitude, x.LONGITUDE, longitude), axis=1) -def apply_haversine(dataframe, latitude, longitude): - """Apply haversine function on split dataframe.""" - - return dataframe.apply(lambda x: haversine((x.LATITUDE, x.LONGITUDE), (latitude, longitude), unit=Unit.MILES), axis=1) - - def logit(x, a, b, c, d): """Logit function.""" diff --git a/aioradio/file_ingestion.py b/aioradio/file_ingestion.py index 639539a..fdceaa1 100644 --- a/aioradio/file_ingestion.py +++ b/aioradio/file_ingestion.py @@ -12,6 +12,7 @@ # pylint: disable=too-many-lines # pylint: disable=too-many-locals # pylint: disable=too-many-nested-blocks +# pylint: disable=too-many-positional-arguments # pylint: disable=too-many-public-methods import asyncio diff --git a/aioradio/psycopg2.py b/aioradio/psycopg2.py index 1705a52..1fe7c76 100644 --- a/aioradio/psycopg2.py +++ b/aioradio/psycopg2.py @@ -2,6 +2,7 @@ # pylint: disable=c-extension-no-member # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments import psycopg2 diff --git a/aioradio/pyodbc.py b/aioradio/pyodbc.py index 6392ad9..e5b42fc 100644 --- a/aioradio/pyodbc.py +++ b/aioradio/pyodbc.py @@ -2,6 +2,7 @@ # pylint: disable=c-extension-no-member # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unsubscriptable-object import os diff --git a/aioradio/redis.py b/aioradio/redis.py index 66f17b5..c866e99 100644 --- a/aioradio/redis.py +++ b/aioradio/redis.py @@ -4,6 +4,7 @@ # pylint: disable=no-member # pylint: disable=too-many-arguments # pylint: disable=too-many-instance-attributes +# pylint: disable=too-many-positional-arguments # pylint: disable=unsubscriptable-object import hashlib diff --git a/aioradio/requirements.txt b/aioradio/requirements.txt index 83e96ab..a9adaf7 100644 --- a/aioradio/requirements.txt +++ b/aioradio/requirements.txt @@ -6,36 +6,36 @@ botocore==1.34.131 cython==3.0.11 databricks-connect==14.3.1 ddtrace==2.6.5 -fakeredis==2.24.1 +fakeredis==2.25.1 faust-cchardet==2.1.19 flask==3.0.3 flask-cors==4.0.1 grpcio==1.62.2 grpcio-status==1.62.2 -haversine==2.8.1 httpx==0.27.2 +importlib-metadata==8.4.0 mandrill==1.0.60 -mlflow==2.14.3 +mlflow==2.16.2 moto==4.2.14 numpy==1.26.4 openpyxl==3.0.10 orjson==3.9.15 -pandas==2.2.2 +pandas==2.2.3 pkginfo==1.10.0 -polars==1.7.1 +polars==1.9.0 pre-commit==3.8.0 protobuf==4.25.4 psycopg2-binary==2.9.9 pyarrow==15.0.2 -pylint==3.2.7 +pylint==3.3.1 pyodbc==5.1.0 --no-binary=pyodbc -pysmb==1.2.9.1 +pysmb==1.2.10 pyspark==3.4.3 pytest==8.1.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 python-json-logger==2.0.7 -redis==5.0.8 +redis==5.1.1 twine==5.1.1 typing_extensions==4.11.0 werkzeug==3.0.4 diff --git a/aioradio/tests/file_ingestion_test.py b/aioradio/tests/file_ingestion_test.py index c905989..bb7d81a 100644 --- a/aioradio/tests/file_ingestion_test.py +++ b/aioradio/tests/file_ingestion_test.py @@ -202,11 +202,10 @@ async def func(): @pytest.mark.asyncio -async def test_async_db_wrapper(user): +async def test_async_db_wrapper(): """Test async_db_wrapper with database connections.""" - if user != 'tim.reichard': - pytest.skip('Skip test_async_db_wrapper since user is not Tim Reichard') + pytest.skip('Skip test_async_db_wrapper since we no longer have access to aws secrets locally.') db_info=[{ 'db': 'pyodbc', diff --git a/setup.py b/setup.py index 6c79249..2534f53 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ long_description = fileobj.read() setup(name='aioradio', - version='0.21.0', + version='0.21.1', description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more', long_description=long_description, long_description_content_type="text/markdown", @@ -31,7 +31,6 @@ 'fakeredis>=2.20.0', 'grpcio==1.62.2', 'grpcio-status==1.62.2', - 'haversine>=2.8.0', 'httpx>=0.23.0', 'mandrill>=1.0.60', 'mlflow>=2.10.2',