Skip to content

Commit

Permalink
add max_chunk_size_mb arg
Browse files Browse the repository at this point in the history
  • Loading branch information
maxim-lisovsky-gismart committed Mar 18, 2024
1 parent 307abcd commit 08d1660
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Add `--upgrade` option to update existing package to a new version
Specify package link in your `requirements.txt`:

```txt
git+https://github.com/gismart/[email protected].1#egg=bi-utils-gismart
git+https://github.com/gismart/[email protected].2#egg=bi-utils-gismart
```

### Usage
Expand Down
9 changes: 6 additions & 3 deletions bi_utils/aws/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,17 @@ def download_files(
database: Optional[str] = None,
host: Optional[str] = None,
retries: int = 0,
max_chunk_size_mb: int = 6000,
add_timestamp_dir: bool = True,
add_s3_timestamp_dir: bool = True,
) -> Sequence[str]:
"""Copy data from RedShift to S3 and download csv or parquet files up to 6.2 GB"""

max_chunk_size_opt = f"MAXFILESIZE {max_chunk_size_mb} MB"
if file_format.lower() == "csv":
unload_options = ["CSV", "HEADER", "GZIP", "PARALLEL ON"]
unload_options = ["CSV", "HEADER", "GZIP", "PARALLEL ON", max_chunk_size_opt]
elif file_format.lower() == "parquet":
separator = None
unload_options = ["PARQUET", "PARALLEL ON"]
unload_options = ["PARQUET", "PARALLEL ON", max_chunk_size_opt]
else:
raise ValueError(f"{file_format} file format is not supported")
if delete_s3_before:
Expand Down Expand Up @@ -220,6 +221,7 @@ def download_data(
database: Optional[str] = None,
host: Optional[str] = None,
retries: int = 0,
max_chunk_size_mb: int = 6000,
remove_files: bool = True,
delete_s3_before: bool = False,
delete_s3_after: bool = True,
Expand All @@ -238,6 +240,7 @@ def download_data(
database=database,
host=host,
retries=retries,
max_chunk_size_mb=max_chunk_size_mb,
delete_s3_before=delete_s3_before,
delete_s3_after=delete_s3_after,
add_timestamp_dir=add_timestamp_dir,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="bi-utils-gismart",
version="0.16.1",
version="0.16.2",
author="gismart",
author_email="[email protected]",
description="Utils for BI team",
Expand Down

0 comments on commit 08d1660

Please sign in to comment.