From 13201611a85dcc63500a343b0d9a4ab68f2e5c78 Mon Sep 17 00:00:00 2001 From: "tim.reichard" Date: Fri, 20 Jan 2023 05:49:41 -0600 Subject: [PATCH] Raise errors instead of printing in file_ingestion function zipfile_to_tsv --- .pre-commit-config.yaml | 6 +-- HISTORY.rst | 7 ++++ aioradio/file_ingestion.py | 76 ++++++++++++++++------------------- aioradio/long_running_jobs.py | 1 - aioradio/requirements.txt | 8 ++-- aioradio/tests/pyodbc_test.py | 4 +- setup.py | 2 +- 7 files changed, 52 insertions(+), 52 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e2ad10..148d094 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3.10 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files - id: check-ast @@ -15,10 +15,10 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.11.4 hooks: - id: isort - repo: https://github.com/myint/docformatter - rev: v1.5.0 + rev: v1.5.1 hooks: - id: docformatter diff --git a/HISTORY.rst b/HISTORY.rst index 98b05ab..4514f73 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,13 @@ History ======= +v0.17.27 (2023-01-20) + +* Raise errors instead of printing in file_ingestion function zipfile_to_tsv. +* Update dominodatalab==1.2.2. +* Update pysmb==1.2.9.1. + + v0.17.26 (2023-01-12) * Add option to pass aws_creds argument in get_secret function. diff --git a/aioradio/file_ingestion.py b/aioradio/file_ingestion.py index ce08872..7ec46c8 100644 --- a/aioradio/file_ingestion.py +++ b/aioradio/file_ingestion.py @@ -57,7 +57,6 @@ class EFIParse: entry_year_filter: dict = dc_field(default_factory=dict) def __post_init__(self): - if not self.fice_enrolled_logic: self.fice_enrolled_logic = { "001100", @@ -1681,49 +1680,44 @@ async def zipfile_to_tsv( Union[str, None]: Error message during process else None """ - try: - extensions = ['xlsx', 'txt', 'csv', 'tsv'] - records = [] - header = None - with NamedTemporaryFile(suffix='.zip') as tmp: - await download_file(bucket=s3_source_bucket, filepath=tmp.name, s3_key=s3_source_key) - with TemporaryDirectory() as tmp_directory: - for path in await unzip_file_get_filepaths(tmp.name, tmp_directory, include_extensions=extensions): - ext = os.path.splitext(path)[1].lower() - if ext == '.xlsx': - records_from_path, header = xlsx_to_records(path, header) - records.extend(records_from_path) + extensions = ['xlsx', 'txt', 'csv', 'tsv'] + records = [] + header = None + + with NamedTemporaryFile(suffix='.zip') as tmp: + await download_file(bucket=s3_source_bucket, filepath=tmp.name, s3_key=s3_source_key) + with TemporaryDirectory() as tmp_directory: + for path in await unzip_file_get_filepaths(tmp.name, tmp_directory, include_extensions=extensions): + ext = os.path.splitext(path)[1].lower() + if ext == '.xlsx': + records_from_path, header = xlsx_to_records(path, header) + records.extend(records_from_path) + else: + encoding = detect_encoding(path) + if encoding is None: + raise IOError(f"Failed to detect proper encoding for {path}") + encodings = [encoding] + [i for i in ['UTF-8', 'LATIN-1', 'UTF-16'] if i != encoding] + for encoding in encodings: + try: + detected_delimiter = detect_delimiter(path, encoding) + if detected_delimiter: + try: + records_from_path, header = tsv_to_records(path, encoding, detected_delimiter, header) + records.extend(records_from_path) + break + except Exception as err: + if str(err) == 'Every file must contain the exact same header': + raise ValueError('Every file must contain the exact same header') from err + continue + except Exception as err: + if str(err) == 'Every file must contain the exact same header': + raise ValueError('Every file must contain the exact same header') from err + continue else: - encoding = detect_encoding(path) - if encoding is None: - raise IOError(f"Failed to detect proper encoding for {path}") - encodings = [encoding] + [i for i in ['UTF-8', 'LATIN-1', 'UTF-16'] if i != encoding] - for encoding in encodings: - try: - detected_delimiter = detect_delimiter(path, encoding) - if detected_delimiter: - try: - records_from_path, header = tsv_to_records(path, encoding, detected_delimiter, header) - records.extend(records_from_path) - break - except Exception as err: - if str(err) == 'Every file must contain the exact same header': - raise ValueError('Every file must contain the exact same header') from err - continue - except Exception as err: - if str(err) == 'Every file must contain the exact same header': - raise ValueError('Every file must contain the exact same header') from err - continue - else: - raise IOError(f"Failed to detect proper encoding for {path}") - + raise IOError(f"Failed to detect proper encoding for {path}") - await tsv_to_s3(records, delimiter, s3_destination_bucket, s3_destination_key) - - except Exception as err: - print(err) - return str(err) + await tsv_to_s3(records, delimiter, s3_destination_bucket, s3_destination_key) return None diff --git a/aioradio/long_running_jobs.py b/aioradio/long_running_jobs.py index 473015b..2419302 100644 --- a/aioradio/long_running_jobs.py +++ b/aioradio/long_running_jobs.py @@ -64,7 +64,6 @@ class LongRunningJobs: httpx_client: httpx.AsyncClient = httpx.AsyncClient() def __post_init__(self): - self.queue_service = self.queue_service.lower() if self.queue_service not in ['sqs', 'redis']: raise ValueError("queue_service must be either 'sqs' or 'redis'.") diff --git a/aioradio/requirements.txt b/aioradio/requirements.txt index b880e42..38ef1f2 100644 --- a/aioradio/requirements.txt +++ b/aioradio/requirements.txt @@ -5,7 +5,7 @@ boto3==1.24.59 botocore==1.27.59 cchardet==2.1.7 ddtrace==1.3.6 -dominodatalab==1.2.1 +dominodatalab==1.2.2 fakeredis==1.10.1 flask==2.1.2 flask-cors==3.0.10 @@ -17,10 +17,10 @@ orjson==3.8.5 pandas==1.4.4 pre-commit==2.21.0 psycopg2-binary==2.9.5 -pylint==2.15.9 +pylint==2.15.10 pyodbc==4.0.35 -pysmb==1.2.8 -pytest==7.2.0 +pysmb==1.2.9.1 +pytest==7.2.1 pytest-asyncio==0.20.3 pytest-cov==4.0.0 python-json-logger==2.0.4 diff --git a/aioradio/tests/pyodbc_test.py b/aioradio/tests/pyodbc_test.py index 8bebd96..a98dbf6 100644 --- a/aioradio/tests/pyodbc_test.py +++ b/aioradio/tests/pyodbc_test.py @@ -17,8 +17,8 @@ async def test_bad_unixodbc_driver(): async def test_pyodbc_query_fetchone_and_fetchall(): """Test pyodbc_query_fetchone. - Make sure you have unixodbc and freetds installed; - see here: http://www.christophers.tips/pages/pyodbc_mac.html. + Make sure you have unixodbc and freetds installed; see here: + http://www.christophers.tips/pages/pyodbc_mac.html. """ pytest.skip('Skip test_pyodbc_query_fetchone_and_fetchall since it contains sensitive info') diff --git a/setup.py b/setup.py index 8a370e1..42b9835 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ long_description = fileobj.read() setup(name='aioradio', - version='0.17.26', + version='0.17.27', description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more', long_description=long_description, long_description_content_type="text/markdown",