From 7fa11e66a2f76088332f93ded26cae10096a4116 Mon Sep 17 00:00:00 2001
From: "tim.reichard" <tim.reichard@nrccua.org>
Date: Tue, 11 Jan 2022 14:36:49 -0600
Subject: [PATCH] Adding new EL3 field parsing logic

---
 .pre-commit-config.yaml    |   4 +-
 HISTORY.rst                |  15 ++
 Makefile                   |   2 +-
 aioradio/file_ingestion.py | 292 +++++++++++++++++++++++++++++++++++--
 aioradio/requirements.txt  |  20 +--
 setup.py                   |  16 +-
 6 files changed, 315 insertions(+), 34 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3eb679b..0915436 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ default_language_version:
     python: python3.9
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.1.0
     hooks:
     -   id: check-added-large-files
     -   id: check-ast
@@ -15,7 +15,7 @@ repos:
     -   id: requirements-txt-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.8.0
+    rev: 5.10.1
     hooks:
     -   id: isort
 -   repo: https://github.com/myint/docformatter
diff --git a/HISTORY.rst b/HISTORY.rst
index 231627b..fce12e1 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -3,6 +3,21 @@ History
 =======
 
 
+v0.16.0 (2022-01-11)
+
+* Update ddtrace==0.57.0.
+* Update moto==2.3.1.
+* Update numpy==1.22.0.
+* Update orjson==3.6.5
+* Update pre-commit==2.16.0.
+* Update psycopg2-binary==2.9.3.
+* Update pylint==2.12.2.
+* Update pytest-asyncio==0.16.0.
+* Update twine==3.7.1.
+* Update wheel==0.37.1
+* Add support for EL3 field parsing.
+
+
 v0.15.6 (2021-11-30)
 
 * Hard-code redis==3.5.3.
diff --git a/Makefile b/Makefile
index 3ab07a6..9ba52b9 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ lint:
 test:
 	. env/bin/activate; \
 	export AWS_PROFILE=sandbox; \
-	pytest -vss --cov=aioradio  --cov-config=.coveragerc --cov-report=html --cov-fail-under=60
+	pytest -vss --cov=aioradio  --cov-config=.coveragerc --cov-report=html --cov-fail-under=50
 
 pre-commit:
 	. env/bin/activate; \
diff --git a/aioradio/file_ingestion.py b/aioradio/file_ingestion.py
index f08fe71..6092f07 100644
--- a/aioradio/file_ingestion.py
+++ b/aioradio/file_ingestion.py
@@ -75,8 +75,8 @@ def __post_init__(self):
 
         if not self.entry_year_filter:
             self.entry_year_filter = {
-                "start": "2021",
-                "end": "2025"
+                "start": "2022",
+                "end": "2026"
             }
 
         now = datetime.now()
@@ -91,7 +91,14 @@ def __post_init__(self):
             "Enrolled": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
             "Canceled": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
             "Dropped": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
-            "Graduated": (now - timedelta(days=50 * 365), now + timedelta(days=365))
+            "Graduated": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "ProspectDate": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "FAFSASubmitted": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "CustomDate1": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "CustomDate2": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "CustomDate3": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "CustomDate4": (now - timedelta(days=50 * 365), now + timedelta(days=365)),
+            "CustomDate5": (now - timedelta(days=50 * 365), now + timedelta(days=365))
         }
 
         self.filed_year_min_max = {
@@ -128,7 +135,40 @@ def __post_init__(self):
             "AcademicProgram": 256,
             "StudentAthlete": 50,
             "CampusLocation": 50,
-            "Email": 75
+            "Email": 75,
+            "CellPhoneNumber": 10,
+            "TextMessageOptIn": 5,
+            "HomePhone": 10,
+            "Ethnicity": 1,
+            "FirstGenFlag": 1,
+            "EFC": 20,
+            "HSCode": 6,
+            "ACTScore": 2,
+            "SATScore": 4,
+            "ProspectCode": 15,
+            "ProspectDate": 10,
+            "FAFSASubmitted": 10,
+            "ApplicationPlan": 30,
+            "AdmitCode": 20,
+            "College": 30,
+            "AdmittedProgram": 30,
+            "HonorsProgram": 5,
+            "StudentType": 20,
+            "International": 5,
+            "CountryOfOrigin": 30,
+            "StudentStatus": 20,
+            "Territory": 20,
+            "EngagementScore": 10,
+            "CustomFilter1": 20,
+            "CustomFilter2": 20,
+            "CustomFilter3": 20,
+            "CustomFilter4": 20,
+            "CustomFilter5": 20,
+            "CustomDate1": 10,
+            "CustomDate2": 10,
+            "CustomDate3": 10,
+            "CustomDate4": 10,
+            "CustomDate5": 10
         }
 
         self.gender_map = {
@@ -362,6 +402,19 @@ def __post_init__(self):
             'prospects': 0
         }
 
+        self.generic_bool_map = {
+            'YES': 'Y',
+            'NO': 'N',
+            'Y': 'Y',
+            'N': 'N',
+            'TRUE': 'Y',
+            'FALSE': 'N',
+            '1': 'Y',
+            '0': 'N'
+        }
+
+        self.ethnicity_federal_categories = {'1', '2', '3', '4', '5', '6', '7', '8'}
+
     def check_width(self, value: str, field: str, row_idx: int) -> str:
         """Check field value and truncate if it is longer than expected.
 
@@ -411,8 +464,7 @@ def check_gender(self, value: str) -> str:
         """
 
         if value != '':
-            value_upper = value.upper()
-            value = self.gender_map[value_upper] if value_upper in self.gender_map else ''
+            value = self.gender_map.get(value.upper(), '')
 
         return value
 
@@ -432,8 +484,7 @@ def check_gpa(self, value: str, field: str, row_idx: int) -> str:
             try:
                 value = '' if not (0 <= float(value) <= 200) else self.check_width(value, field, row_idx)
             except ValueError:
-                value_upper = value.upper()
-                value = self.grades_map[value_upper] if value_upper in self.grades_map else ''
+                value = self.grades_map.get(value.upper(), '')
 
         return value
 
@@ -450,9 +501,7 @@ def check_statecode(self, value: str, field: str, row_idx: int) -> str:
         """
 
         if value != '':
-            value_upper = value.upper()
-            if value_upper in self.state_to_statecode:
-                value = self.state_to_statecode[value_upper]
+            value = self.state_to_statecode.get(value.upper(), value)
             value = self.check_width(value, field, row_idx)
 
         return value
@@ -576,8 +625,7 @@ def check_athlete(self, value: str) -> str:
         """
 
         if value != '':
-            value_upper = value.upper()
-            value = self.student_athlete_map[value_upper] if value_upper in self.student_athlete_map else 'Y'
+            value = self.student_athlete_map.get(value.upper(), 'Y')
 
         return value
 
@@ -799,10 +847,228 @@ def check_for_prospects(self, row: dict[str, Any]) -> bool:
 
         return skip_record
 
+
+    ###############################################################################################
+    ############################### New EL3 field parsing functions ###############################
+    ###############################################################################################
+    #
+    # CELLPHONENUMBER
+    # TEXTMESSAGEOPTIN
+    # HOMEPHONE
+    # ETHNICITY
+    # FIRSTGENFLAG
+    # EFC
+    # HSCODE
+    # ACTSCORE
+    # SATSCORE
+    # PROSPECTCODE
+    # PROSPECTDATE
+    # FAFSASUBMITTED
+    # APPLICATIONPLAN
+    # ADMITCODE
+    # COLLEGE
+    # ADMITTEDPROGRAM
+    # HONORSPROGRAM
+    # STUDENTTYPE
+    # INTERNATIONAL
+    # COUNTRYOFORIGIN
+    # STUDENTSTATUS
+    # TERRITORY
+    # ENGAGEMENTSCORE
+    # CUSTOMFILTER1, ..., CUSTOMFILTER5
+    # CUSTOMDATE1, ..., CUSTOMDATE5
+    #
+    # Many of these fields are parsed using the functions check_generic or check_date
+    # else they use a function below.
+
+    def check_generic_boolean(self, value: str) -> str:
+        """Check generic boolean value.
+
+        Args:
+            value (str): Generic Boolean value
+
+        Returns:
+            str: Generic Boolean value
+        """
+
+        if value != '':
+            value = self.generic_bool_map.get(value.upper(), '')
+
+        return value
+
+    def check_phone_number(self, value: str, field: str, row_idx: int) -> str:
+        """Check Cell/Home phone number logic.
+
+        Args:
+            value (str): Cell/Home phone number value
+            field (str): Column header field value
+            row_idx (int): Row number in file
+
+        Returns:
+            str: Cell/Home phone number value
+        """
+
+        if value != '':
+            value = ''.join(n for n in value if n.isdigit())
+            value = self.check_width(value, field, row_idx)
+
+        return value
+
+    def check_ethnicity(self, value: str) -> str:
+        """Check Ethnicity is a federal category value.
+
+        Args:
+            value (str): Ethnicity category
+
+        Returns:
+            str: Ethnicity category
+        """
+
+        if value != '' and value not in self.ethnicity_federal_categories:
+            value = ''
+
+        return value
+
+    @staticmethod
+    def check_act_score(value: str) -> str:
+        """Check ACT Score logic.
+
+        Args:
+            value (str): ACT score
+            field (str): Column header field value
+
+        Returns:
+            str: ACT score
+        """
+
+        if value != '':
+            try:
+                integer = int(value)
+                value = str(integer) if (1 <= integer <= 36) else ''
+            except ValueError:
+                value = ''
+
+        return value
+
+    @staticmethod
+    def check_sat_score(value: str) -> str:
+        """Check SAT Score logic.
+
+        Args:
+            value (str): SAT score
+            field (str): Column header field value
+
+        Returns:
+            str: SAT score
+        """
+
+        if value != '':
+            try:
+                integer = int(value)
+                value = str(integer) if (400 <= integer <= 1600) else ''
+            except ValueError:
+                value = ''
+
+        return value
+
+    @staticmethod
+    def check_hscode(value: str) -> str:
+        """Check HSCODE logic.
+
+        Args:
+            value (str): HSCODE value
+            field (str): Column header field value
+
+        Returns:
+            str: HSCODE value
+        """
+
+        if value != '' and len(value) == 6:
+            try:
+                _ = int(value)
+            except ValueError:
+                value = ''
+        else:
+            value = ''
+
+        return value
+
     ###############################################################################################
     ################################### Used by EFI exclusively ###################################
     ###############################################################################################
 
+    def check_generic_boolean_efi(self, records: list[str]):
+        """Check generic boolean logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_generic_boolean(records[idx])
+
+    def check_phone_number_efi(self, records: list[str], field: str, row_idx: int):
+        """Check phone number logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_phone_number(records[idx], field, row_idx + idx)
+
+    def check_ethnicity_efi(self, records: list[str]):
+        """Check ethnicity logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_ethnicity(records[idx])
+
+    def check_act_score_efi(self, records: list[str]):
+        """Check ACT score logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_act_score(records[idx])
+
+    def check_sat_score_efi(self, records: list[str]):
+        """Check SAT score logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_sat_score(records[idx])
+
+    def check_hscode_efi(self, records: list[str]):
+        """Check HSCode logic.
+
+        Args:
+            records (list[str]): List of a specific columns values
+            field (str): Column header field value
+            row_idx (int): Row number in file
+        """
+
+        for idx in range(len(records)):
+            records[idx] = self.check_hscode(records[idx])
+
     def check_year_efi(self, records: list[str], field: str, row_idx: int):
         """Check year conforms to expected year within time range.
 
diff --git a/aioradio/requirements.txt b/aioradio/requirements.txt
index 7f8763b..93b4279 100644
--- a/aioradio/requirements.txt
+++ b/aioradio/requirements.txt
@@ -2,24 +2,24 @@ aioboto3==9.2.2
 aiobotocore==1.4.2
 aiojobs==0.3.0
 boto3==1.17.106
-ddtrace==0.56.0
+ddtrace==0.57.0
 fakeredis==1.7.0
 flask==2.0.2
 flask-cors==3.0.10
 httpx==0.20.0
 mandrill==1.0.60
-moto==2.2.17
-numpy==1.21.4
-orjson==3.6.4
-pre-commit==2.15.0
-psycopg2-binary==2.9.2
-pylint==2.12.1
+moto==2.3.1
+numpy==1.22.0
+orjson==3.6.5
+pre-commit==2.16.0
+psycopg2-binary==2.9.3
+pylint==2.12.2
 pyodbc==4.0.32
 pysmb==1.2.7
 pytest==6.2.5
-pytest-asyncio==0.15.1
+pytest-asyncio==0.16.0
 pytest-cov==3.0.0
 python-json-logger==2.0.2
 redis==3.5.3
-twine==3.6.0
-wheel==0.37.0
+twine==3.7.1
+wheel==0.37.1
diff --git a/setup.py b/setup.py
index f0ef0ab..2673622 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
     long_description = fileobj.read()
 
 setup(name='aioradio',
-    version='0.15.6',
+    version='0.16.0',
     description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -23,13 +23,13 @@
         'aiobotocore>=1.4.2',
         'aiojobs>=0.3.0',
         'boto3==1.17.106',
-        'ddtrace>=0.56.0',
+        'ddtrace>=0.57.0',
         'fakeredis>=1.7.0',
         'httpx>=0.19.0',
         'mandrill>=1.0.60',
         'numpy>=1.19',
-        'orjson>=3.6.4',
-        'psycopg2-binary==2.9.2',
+        'orjson>=3.6.5',
+        'psycopg2-binary==2.9.3',
         'pysmb>=1.2.7',
         'python-json-logger>=2.0.2',
         'redis==3.5.3',
@@ -39,11 +39,11 @@
     tests_require=[
         'flask>=2.0.2',
         'flask-cors>=3.0.10',
-        'moto>=2.2.9',
-        'pre-commit>=2.15.0',
-        'pylint>=2.11.1',
+        'moto>=2.3.1',
+        'pre-commit>=2.16.0',
+        'pylint>=2.11.2',
         'pytest>=6.2.5',
-        'pytest-asyncio>=0.15.1',
+        'pytest-asyncio>=0.16.0',
         'pytest-cov>=3.0.0'
     ],
     zip_safe=False,