Skip to content

Commit

Permalink
Merge pull request #92 from nrccua/DS-342-make-efi-transformation-cod…
Browse files Browse the repository at this point in the history
…e-raise-exception-inste

Raise errors instead of printing in file_ingestion function zipfile_to_tsv
  • Loading branch information
nrccua-timr authored Jan 20, 2023
2 parents 4f38dc4 + 1320161 commit 31d2a21
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 52 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ default_language_version:
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-ast
Expand All @@ -15,10 +15,10 @@ repos:
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
rev: 5.11.4
hooks:
- id: isort
- repo: https://github.com/myint/docformatter
rev: v1.5.0
rev: v1.5.1
hooks:
- id: docformatter
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ History
=======


v0.17.27 (2023-01-20)

* Raise errors instead of printing in file_ingestion function zipfile_to_tsv.
* Update dominodatalab==1.2.2.
* Update pysmb==1.2.9.1.


v0.17.26 (2023-01-12)

* Add option to pass aws_creds argument in get_secret function.
Expand Down
76 changes: 35 additions & 41 deletions aioradio/file_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class EFIParse:
entry_year_filter: dict = dc_field(default_factory=dict)

def __post_init__(self):

if not self.fice_enrolled_logic:
self.fice_enrolled_logic = {
"001100",
Expand Down Expand Up @@ -1681,49 +1680,44 @@ async def zipfile_to_tsv(
Union[str, None]: Error message during process else None
"""

try:
extensions = ['xlsx', 'txt', 'csv', 'tsv']
records = []
header = None

with NamedTemporaryFile(suffix='.zip') as tmp:
await download_file(bucket=s3_source_bucket, filepath=tmp.name, s3_key=s3_source_key)
with TemporaryDirectory() as tmp_directory:
for path in await unzip_file_get_filepaths(tmp.name, tmp_directory, include_extensions=extensions):
ext = os.path.splitext(path)[1].lower()
if ext == '.xlsx':
records_from_path, header = xlsx_to_records(path, header)
records.extend(records_from_path)
extensions = ['xlsx', 'txt', 'csv', 'tsv']
records = []
header = None

with NamedTemporaryFile(suffix='.zip') as tmp:
await download_file(bucket=s3_source_bucket, filepath=tmp.name, s3_key=s3_source_key)
with TemporaryDirectory() as tmp_directory:
for path in await unzip_file_get_filepaths(tmp.name, tmp_directory, include_extensions=extensions):
ext = os.path.splitext(path)[1].lower()
if ext == '.xlsx':
records_from_path, header = xlsx_to_records(path, header)
records.extend(records_from_path)
else:
encoding = detect_encoding(path)
if encoding is None:
raise IOError(f"Failed to detect proper encoding for {path}")
encodings = [encoding] + [i for i in ['UTF-8', 'LATIN-1', 'UTF-16'] if i != encoding]
for encoding in encodings:
try:
detected_delimiter = detect_delimiter(path, encoding)
if detected_delimiter:
try:
records_from_path, header = tsv_to_records(path, encoding, detected_delimiter, header)
records.extend(records_from_path)
break
except Exception as err:
if str(err) == 'Every file must contain the exact same header':
raise ValueError('Every file must contain the exact same header') from err
continue
except Exception as err:
if str(err) == 'Every file must contain the exact same header':
raise ValueError('Every file must contain the exact same header') from err
continue
else:
encoding = detect_encoding(path)
if encoding is None:
raise IOError(f"Failed to detect proper encoding for {path}")
encodings = [encoding] + [i for i in ['UTF-8', 'LATIN-1', 'UTF-16'] if i != encoding]
for encoding in encodings:
try:
detected_delimiter = detect_delimiter(path, encoding)
if detected_delimiter:
try:
records_from_path, header = tsv_to_records(path, encoding, detected_delimiter, header)
records.extend(records_from_path)
break
except Exception as err:
if str(err) == 'Every file must contain the exact same header':
raise ValueError('Every file must contain the exact same header') from err
continue
except Exception as err:
if str(err) == 'Every file must contain the exact same header':
raise ValueError('Every file must contain the exact same header') from err
continue
else:
raise IOError(f"Failed to detect proper encoding for {path}")

raise IOError(f"Failed to detect proper encoding for {path}")

await tsv_to_s3(records, delimiter, s3_destination_bucket, s3_destination_key)

except Exception as err:
print(err)
return str(err)
await tsv_to_s3(records, delimiter, s3_destination_bucket, s3_destination_key)

return None

Expand Down
1 change: 0 additions & 1 deletion aioradio/long_running_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class LongRunningJobs:
httpx_client: httpx.AsyncClient = httpx.AsyncClient()

def __post_init__(self):

self.queue_service = self.queue_service.lower()
if self.queue_service not in ['sqs', 'redis']:
raise ValueError("queue_service must be either 'sqs' or 'redis'.")
Expand Down
8 changes: 4 additions & 4 deletions aioradio/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ boto3==1.24.59
botocore==1.27.59
cchardet==2.1.7
ddtrace==1.3.6
dominodatalab==1.2.1
dominodatalab==1.2.2
fakeredis==1.10.1
flask==2.1.2
flask-cors==3.0.10
Expand All @@ -17,10 +17,10 @@ orjson==3.8.5
pandas==1.4.4
pre-commit==2.21.0
psycopg2-binary==2.9.5
pylint==2.15.9
pylint==2.15.10
pyodbc==4.0.35
pysmb==1.2.8
pytest==7.2.0
pysmb==1.2.9.1
pytest==7.2.1
pytest-asyncio==0.20.3
pytest-cov==4.0.0
python-json-logger==2.0.4
Expand Down
4 changes: 2 additions & 2 deletions aioradio/tests/pyodbc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ async def test_bad_unixodbc_driver():
async def test_pyodbc_query_fetchone_and_fetchall():
"""Test pyodbc_query_fetchone.
Make sure you have unixodbc and freetds installed;
see here: http://www.christophers.tips/pages/pyodbc_mac.html.
Make sure you have unixodbc and freetds installed; see here:
http://www.christophers.tips/pages/pyodbc_mac.html.
"""

pytest.skip('Skip test_pyodbc_query_fetchone_and_fetchall since it contains sensitive info')
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.17.26',
version='0.17.27',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 31d2a21

Please sign in to comment.