Skip to content

Commit

Permalink
Merge pull request #44 from gismart/depupd
Browse files Browse the repository at this point in the history
Update dependencies
  • Loading branch information
maxim-lisovsky-gismart authored Jan 25, 2024
2 parents 279d640 + 79f93c5 commit 307abcd
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Add `--upgrade` option to update existing package to a new version
Specify package link in your `requirements.txt`:

```txt
git+https://github.com/gismart/[email protected].0#egg=bi-utils-gismart
git+https://github.com/gismart/[email protected].1#egg=bi-utils-gismart
```

### Usage
Expand Down
11 changes: 8 additions & 3 deletions bi_utils/aws/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import posixpath
import pandas as pd
import datetime as dt
import fastparquet as fp
from typing import Any, Iterable, Iterator, Sequence, Optional, Union
import pyarrow.parquet as pp

from .. import files, sql
from . import connection
Expand Down Expand Up @@ -44,7 +44,7 @@ def upload_file(
copy_options.append("PARQUET")
separator = None
if not columns:
columns = fp.ParquetFile(file_path).columns
columns = pp.ParquetFile(file_path).schema.names
else:
raise ValueError(f"{os.path.basename(file_path)} file extension is not supported")
table_name = f"{schema}.{table}"
Expand Down Expand Up @@ -175,7 +175,12 @@ def upload_data(
logger.warning(f"Partitions are not supported for csv files: {filename}")
data.to_csv(file_path, index=False, sep=separator)
elif file_path.lower().endswith(".parquet"):
data.to_parquet(file_path, partition_cols=partition_cols, times="int96", index=False)
data.to_parquet(
file_path,
partition_cols=partition_cols,
coerce_timestamps="us",
index=False,
)
else:
raise ValueError(f"{filename} file extension is not supported")
logger.info(f"Data is saved to {filename} ({len(data)} rows)")
Expand Down
7 changes: 6 additions & 1 deletion bi_utils/queue_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,12 @@ def _export_df(
elif ".parquet" in file_path.lower():
if partition_cols:
logger.warning(f"Partitions are not supported for csv files: {filename}")
df.to_parquet(file_path, partition_cols=partition_cols, times="int96", index=False)
df.to_parquet(
file_path,
partition_cols=partition_cols,
coerce_timestamps="us",
index=False,
)
else:
df.to_pickle(file_path)
logger.info(f"Saved df to {filename} ({len(df)} rows)")
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
numpy<2.0.0,>=1.19.2
pandas<2.0.0,>=1.1.0
pandas<3.0.0,>=1.1.0
psutil<6.0.0,>=5.7.0
psycopg2-binary<3.0.0,>=2.9.0
scikit-learn<2.0.0,>=0.23.1
SQLAlchemy<2.0.0,>=1.4.46
fastparquet==2023.2.0
locopy==0.5.1
pyarrow>=15.0.0
locopy==0.5.7
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="bi-utils-gismart",
version="0.16.0",
version="0.16.1",
author="gismart",
author_email="[email protected]",
description="Utils for BI team",
Expand Down
5 changes: 3 additions & 2 deletions tests/aws/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_delete_wo_conditions():
def test_upload_download_delete(file_format):
version = 1
db.delete(table, schema=schema, version=version)
timestamp = pd.Timestamp.now()
timestamp = pd.Timestamp.now().as_unit("ns")
data = pd.DataFrame(
{
"text": ["hello", "bye"],
Expand All @@ -31,6 +31,7 @@ def test_upload_download_delete(file_format):
}
)
data.predict_dt = pd.to_datetime(data.predict_dt)
data.load_dttm = pd.to_datetime(data.load_dttm)
db.upload_data(data, f"/tmp/data.{file_format}", schema=schema, table=table)
query = f"""
SELECT text, predict_dt, version, load_dttm
Expand Down Expand Up @@ -59,7 +60,7 @@ def test_upload_update_download(file_format):
new_version = 2
db.delete(table, schema=schema, version=version)
db.delete(table, schema=schema, version=new_version)
timestamp = pd.Timestamp.now()
timestamp = pd.Timestamp.now().as_unit("ns")
data = pd.DataFrame(
{
"text": ["hello", "bye"],
Expand Down
2 changes: 1 addition & 1 deletion tests/transformers/test_hierarchical_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def test_hierarchical_encoder(cols, C, data):
data = data.dropna()
target_data = pd.read_csv(utils.data_path("hierarchical_encoder.csv"))
target_data = target_data[(target_data.cols == str(cols)) & (target_data.C == C)]
target_data = target_data[(target_data.cols.fillna("None") == str(cols)) & (target_data.C == C)]
clipper = transformers.HierarchicalEncoder(cols=cols, C=C)
X = data.drop(["conversion", "conversion_predict"], axis=1)
y = data["conversion"]
Expand Down
2 changes: 1 addition & 1 deletion tests/transformers/test_quantile_clipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def test_quantile_clipper(cols, q, data):
data = data.dropna()
target_data = pd.read_csv(utils.data_path("quantile_clipper.csv"))
target_data = target_data[(target_data.cols == str(cols)) & (target_data.q == q)]
target_data = target_data[(target_data.cols.fillna("None") == str(cols)) & (target_data.q == q)]
clipper = transformers.QuantileClipper(cols=cols, q=q)
X = data.drop(["conversion", "conversion_predict"], axis=1)
y = data["conversion"]
Expand Down
2 changes: 1 addition & 1 deletion tests/transformers/test_target_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def test_target_encoder(cols, C, data):
data = data.dropna()
target_data = pd.read_csv(utils.data_path("target_encoder.csv"))
target_data = target_data[(target_data.cols == str(cols)) & (target_data.C == C)]
target_data = target_data[(target_data.cols.fillna("None") == str(cols)) & (target_data.C == C)]
clipper = transformers.TargetEncoder(cols=cols, C=C)
X = data.drop(["conversion", "conversion_predict"], axis=1)
y = data["conversion"]
Expand Down

0 comments on commit 307abcd

Please sign in to comment.