Skip to content

Commit

Permalink
Merge pull request #56 from nrccua/EFI-check-date-defaults
Browse files Browse the repository at this point in the history
Efi check date defaults
  • Loading branch information
nrccua-timr authored Nov 1, 2021
2 parents 7a06248 + c0120c6 commit ea4ff74
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 37 deletions.
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
History
=======

v0.15.4 (2021-11-01)

* Dates and years checks now use values from constants and we do not need to pass min/max into the corresponding functions:
* check_date()
* check_year()


v0.15.3 (2021-10-27)

Expand Down
94 changes: 58 additions & 36 deletions aioradio/file_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field as dc_field
from datetime import datetime, timezone, tzinfo
from datetime import datetime, timezone, tzinfo, timedelta
from pathlib import Path
from types import coroutine
from typing import Any, Dict, List
Expand Down Expand Up @@ -80,6 +80,26 @@ def __post_init__(self):
"end": "2025"
}

now = datetime.now()
self.filed_date_min_max = {
"BirthDate": (now - timedelta(days=80 * 365), now - timedelta(days=10 * 365)),
"SrcDate": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Inquired": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Applied": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Completed": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Admitted": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Confirmed": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Enrolled": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Canceled": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Dropped": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
"Graduated": (now - timedelta(days=50 * 365), now + timedelta(days=365))
}

self.filed_year_min_max = {
"EntryYear": ((now - timedelta(days=50 * 365)).year, (now + timedelta(days=10 * 365)).year),
"HSGradYear": ((now - timedelta(days=50 * 365)).year, (now + timedelta(days=10 * 365)).year)
}

self.field_to_max_widths = {
"StudentID": 50,
"LastName": 64,
Expand Down Expand Up @@ -438,14 +458,12 @@ def check_statecode(self, value: str, field: str, row_idx: int) -> str:

return value

def check_date(self, value: str, field: str, past: datetime, future: datetime, row_idx: int) -> str:
def check_date(self, value: str, field: str, row_idx: int) -> str:
"""Check date conforms to expected date within time range.
Args:
value (str): Date value
field (str): Column header field value
past (datetime): Past datetime threshold
future (datetime): Future datetime threshold
row_idx (int): Row number in file
Returns:
Expand All @@ -468,15 +486,19 @@ def check_date(self, value: str, field: str, past: datetime, future: datetime, r
val = datetime.strptime(value, pattern)
if idx != 0:
self.date_formats[0], self.date_formats[idx] = self.date_formats[idx], self.date_formats[0]
if past <= val <= future:
val = val.strftime('%Y/%m/%d')
self.cache['date'][value] = val
self.cache['sort_date'][val] = f"{val[5:7]}/{val[8:10]}/{val[:4]}"
value = val
else:
LOG.warning(f"[{self.filename}] [row:{row_idx}] [{field}] - {val.date()}"
f" not between range of {past.date()} to {future.date()}")
value = ''
if field in self.filed_date_min_max:
# we have date field with defined min/max range.
dmin = self.filed_date_min_max[field][0]
dmax = self.filed_date_min_max[field][1]
if dmin <= val <= dmax:
val = val.strftime('%Y/%m/%d')
self.cache['date'][value] = val
self.cache['sort_date'][val] = f"{val[5:7]}/{val[8:10]}/{val[:4]}"
value = val
else:
LOG.warning(f"[{self.filename}] [row:{row_idx}] [{field}] - {val.date()}"
f" not between range of {dmin.date()} to {dmax.date()}")
value = ''
break
except ValueError:
pass
Expand All @@ -486,14 +508,12 @@ def check_date(self, value: str, field: str, past: datetime, future: datetime, r

return value

def check_year(self, value: str, field: str, past: datetime, future: datetime, row_idx: int) -> str:
def check_year(self, value: str, field: str, row_idx: int) -> str:
"""Check year conforms to expected year within time range.
Args:
value (str): Year value
field (str): Column header field value
past (datetime): Past datetime threshold
future (datetime): Future datetime threshold
row_idx (int): Row number in file
Returns:
Expand All @@ -509,14 +529,18 @@ def check_year(self, value: str, field: str, past: datetime, future: datetime, r
val = datetime.strptime(value, pattern).year
if idx != 0:
self.year_formats[0], self.year_formats[idx] = self.year_formats[idx], self.year_formats[0]
if past <= val <= future:
val = str(val)
self.cache['year'][value] = val
value = val
else:
LOG.warning(f"[{self.filename}] [row:{row_idx}] [{field}] - {val} not between range of {past} to {future}")
self.cache['year'][value] = ''
value = ''
if field in self.filed_year_min_max:
# we have year field with defined min/max range.
ymin = self.filed_year_min_max[field][0]
ymax = self.filed_year_min_max[field][1]
if ymin <= val <= ymax:
val = str(val)
self.cache['year'][value] = val
value = val
else:
LOG.warning(f"[{self.filename}] [row:{row_idx}] [{field}] - {val} not between range of {ymin} to {ymax}")
self.cache['year'][value] = ''
value = ''
break
except ValueError:
pass
Expand Down Expand Up @@ -659,7 +683,7 @@ def check_no_spaces_address(self, value: str) -> str:
for item in self.addr_suffix_list:
if value_lower.endswith(item):
# add space before item
value = f"{value[:len(value)-2]} {value[len(value)-2:]}"
value = f"{value[:len(value) - 2]} {value[len(value) - 2:]}"
break

return value
Expand Down Expand Up @@ -737,8 +761,8 @@ def check_entry_fields(self, entryyear, entryterm) -> tuple:
word_length = len(word)
if word_length >= 4:
words.append(word[:4])
if word[:4] != word[word_length-4:]:
words.append(word[word_length-4:])
if word[:4] != word[word_length - 4:]:
words.append(word[word_length - 4:])

for word in words:
if self.entry_year_filter['start'] <= self.season_year_map[word] <= self.entry_year_filter['end']:
Expand Down Expand Up @@ -786,27 +810,27 @@ def check_year_efi(self, records: list[str], field: str, past: datetime, future:
Args:
records (list[str]): List of a specific columns values
field (str): Column header field value
past (datetime): Past datetime threshold
future (datetime): Future datetime threshold
past (datetime): Past datetime threshold --> Deprecated, remove
future (datetime): Future datetime threshold --> Deprecated, remove
row_idx (int): Row number in file
"""

for idx in range(len(records)):
records[idx] = self.check_year(records[idx], field, past, future, row_idx + idx)
records[idx] = self.check_year(records[idx], field, row_idx + idx)

def check_date_efi(self, records: list[str], field: str, past: datetime, future: datetime, row_idx: int):
"""Check date conforms to expected date within time range.
Args:
records (list[str]): List of a specific columns values
field (str): Column header field value
past (datetime): Past datetime threshold
future (datetime): Future datetime threshold
past (datetime): Past datetime threshold --> Deprecated, remove
future (datetime): Future datetime threshold --> Deprecated, remove
row_idx (int): Row number in file
"""

for idx in range(len(records)):
records[idx] = self.check_date(records[idx], field, past, future, row_idx + idx)
records[idx] = self.check_date(records[idx], field, row_idx + idx)
if field in self.non_prospect_fields and records[idx]:
self.non_prospect_row_idxs.add(idx)

Expand Down Expand Up @@ -1121,7 +1145,6 @@ async def unzip_file(filepath: str, directory: str) -> List[str]:

filenames = []
with zipfile.ZipFile(filepath) as zipped:

# exclude __MACOSX directory that could be added when creating zip on macs
filenames = [i for i in zipped.namelist() if '__MACOSX' not in i]
zipped.extractall(directory)
Expand Down Expand Up @@ -1194,8 +1217,7 @@ async def send_emails_via_mandrill(
subject: str,
global_merge_vars: List[Dict[str, Any]],
template_name: str,
template_content: List[Dict[str, Any]] = None
) -> Any:
template_content: List[Dict[str, Any]] = None) -> Any:
"""Send emails via Mailchimp mandrill API.
Args:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.15.3',
version='0.15.4',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit ea4ff74

Please sign in to comment.