Merge pull request #3086 from GSA-TTS/main

GSA-TTS · Dec 26, 2023 · c212fea · c212fea
2 parents a354bc0 + 84b08a1
commit c212fea
Show file tree

Hide file tree

Showing 22 changed files with 282 additions and 226 deletions.
diff --git a/.github/workflows/historic-data-migrator.yml b/.github/workflows/historic-data-migrator.yml
@@ -43,4 +43,4 @@ jobs:
           cf_password: ${{ secrets.CF_PASSWORD }}
           cf_org: gsa-tts-oros-fac
           cf_space: ${{ env.space }}
-          command: cf run-task gsa-fac -k 2G -m 2G --name historic_data_migrator --command "python manage.py historic_data_migrator --dbkeys ${{ inputs.dbkeys }} --years ${{ inputs.years }} --email ${{ inputs.email }}"
+          command: cf run-task gsa-fac -k 2G -m 2G --name historic_data_migrator --command "python manage.py historic_data_migrator --dbkeys ${{ inputs.dbkeys }} --years ${{ inputs.years }}"
diff --git a/backend/census_historical_migration/api_test_helpers.py b/backend/census_historical_migration/api_test_helpers.py
@@ -0,0 +1,62 @@
+from audit.utils import Util
+from .base_field_maps import FormFieldInDissem, WorkbookFieldInDissem
+from .sac_general_lib.report_id_generator import xform_dbkey_to_report_id
+from .workbooklib.excel_creation_utils import apply_conversion_function
+
+
+def generate_dissemination_test_table(
+    audit_header, api_endpoint, mappings=None, objects=None
+):
+    """Generates a test table for verifying the API queries results."""
+    table = {}
+    table["endpoint"] = api_endpoint
+    table["report_id"] = xform_dbkey_to_report_id(audit_header)
+
+    if mappings and objects:
+        table["rows"] = list()
+
+        for o in objects:
+            test_obj = {}
+            test_obj["fields"] = []
+            test_obj["values"] = []
+            for m in mappings:
+                # What if we only test non-null values?
+                raw_value = getattr(o, m.in_db, None)
+                attribute_value = apply_conversion_function(
+                    raw_value, m.default, m.type
+                )
+                if (attribute_value is not None) and (attribute_value != ""):
+                    if m.in_dissem == WorkbookFieldInDissem:
+                        test_obj["fields"].append(m.in_sheet)
+                        test_obj["values"].append(m.type(attribute_value))
+                    else:
+                        test_obj["fields"].append(m.in_dissem)
+                        test_obj["values"].append(m.type(attribute_value))
+
+            table["rows"].append(test_obj)
+    else:
+        table["singletons"] = dict()
+
+    return table
+
+
+def extract_api_data(mappings, section_object):
+    """Extract data for verifying the API queries results."""
+    table = {}
+
+    for item in mappings:
+        value = section_object[item.in_form]
+
+        # Apply same transformations as in `intake_to_dissemination.py`
+        if item.type == bool:
+            value = Util.bool_to_yes_no(value)
+        elif item.type == list:
+            value = Util.json_array_to_str(value)
+
+        if item.in_dissem:
+            if item.in_dissem == FormFieldInDissem:
+                table[item.in_form] = value
+            else:
+                table[item.in_dissem] = value
+
+    return table
diff --git a/..._migration/workbooklib/end_to_end_core.py → ...s_historical_migration/end_to_end_core.py b/..._migration/workbooklib/end_to_end_core.py → ...s_historical_migration/end_to_end_core.py
@@ -1,16 +1,16 @@
-from .post_upload_utils import record_dummy_pdf_object
-from ..exception_utils import (
+from .workbooklib.post_upload_utils import record_dummy_pdf_object
+from .exception_utils import (
     DataMigrationError,
     DataMigrationValueError,
 )
-from ..workbooklib.workbook_builder_loader import (
+from .workbooklib.workbook_builder_loader import (
     workbook_builder_loader,
 )
-from ..workbooklib.workbook_section_handlers import (
+from .workbooklib.workbook_section_handlers import (
     sections_to_handlers,
 )
-from ..sac_general_lib.sac_creator import setup_sac
-from ..models import (
+from .sac_general_lib.sac_creator import setup_sac
+from .models import (
     ReportMigrationStatus,
     MigrationErrorDetail,
 )
@@ -51,18 +51,16 @@
 
 
 def step_through_certifications(sac):
-    stati = [
-        SingleAuditChecklist.STATUS.IN_PROGRESS,
-        SingleAuditChecklist.STATUS.READY_FOR_CERTIFICATION,
-        SingleAuditChecklist.STATUS.AUDITOR_CERTIFIED,
-        SingleAuditChecklist.STATUS.AUDITEE_CERTIFIED,
-        SingleAuditChecklist.STATUS.CERTIFIED,
-        SingleAuditChecklist.STATUS.SUBMITTED,
-        SingleAuditChecklist.STATUS.DISSEMINATED,
-    ]
-    for status in stati:
-        sac.transition_name.append(status)
-        sac.transition_date.append(datetime.now(timezone.utc))
+    sac.transition_to_ready_for_certification()
+    sac.transition_to_auditor_certified()
+    sac.transition_to_auditee_certified()
+
+    # FIXME-MSHD: We have no method transition_to_certified()
+    sac.transition_name.append(SingleAuditChecklist.STATUS.CERTIFIED)
+    sac.transition_date.append(datetime.now(timezone.utc))
+
+    sac.transition_to_submitted()
+    sac.transition_to_disseminated()
     sac.save()
 
 
@@ -128,20 +126,20 @@ def are_they_both_none_or_empty(a, b):
     return a_val and b_val
 
 
-def check_equality(in_wb, in_json):
+def check_equality(in_wb, in_api):
     # Type requirement is sometimes just 'N'
-    if in_wb in ["Y", "N"] and isinstance(in_json, bool):
-        return (True if in_wb == "Y" else False) == in_json
-    elif just_numbers(in_wb) and just_numbers(in_json):
+    if in_wb in ["Y", "N"] and isinstance(in_api, bool):
+        return (True if in_wb == "Y" else False) == in_api
+    elif just_numbers(in_wb) and just_numbers(in_api):
         return (
-            True if math.isclose(float(in_wb), float(in_json), rel_tol=1e-1) else False
+            True if math.isclose(float(in_wb), float(in_api), rel_tol=1e-1) else False
         )
-    elif isinstance(in_wb, str) and isinstance(in_json, str):
-        return _compare_multiline_strings(in_wb, in_json)
-    elif in_wb is None or in_json is None:
-        return are_they_both_none_or_empty(in_wb, in_json)
+    elif isinstance(in_wb, str) and isinstance(in_api, str):
+        return _compare_multiline_strings(in_wb, in_api)
+    elif in_wb is None or in_api is None:
+        return are_they_both_none_or_empty(in_wb, in_api)
     else:
-        return in_wb == in_json
+        return str(in_wb) == str(in_api)
 
 
 def _compare_multiline_strings(str1, str2):
@@ -192,56 +190,73 @@ def combine_counts(combined, d):
     return combined
 
 
+def process_singletons(endo, summary):
+    """Process the singletons in the JSON test table"""
+    for field, value in endo.get("singletons", {}).items():
+        api_values = get_api_values(endo["endpoint"], endo["report_id"], field)
+        eq = check_equality(value, api_values[0])
+        if eq:
+            count(summary, "correct_fields")
+        else:
+            logger.info(
+                f"Does not match. [eq {eq}] [field {field}] [field val {value}] != [api val {api_values[0]}]"
+            )
+            count(summary, "incorrect_fields")
+
+
+def process_rows(endo, combined_summary, summary):
+    """Process the rows in the JSON test table"""
+    rows = endo.get("rows", [])
+    equality_results = []
+    for row_ndx, row in enumerate(rows):
+        count(summary, "total_rows")
+
+        if False in equality_results:
+            count(combined_summary, "incorrect_rows")
+        else:
+            count(combined_summary, "correct_rows")
+
+        equality_results = []
+
+        for field_ndx, f in enumerate(row["fields"]):
+            # logger.info(f"Checking /{endo["endpoint"]} {endo["report_id"]} {f}")
+            # logger.info(f"{get_api_values(endo["endpoint"], endo["report_id"], f)}")
+            api_values = get_api_values(endo["endpoint"], endo["report_id"], f)
+            this_api_value = api_values[row_ndx]
+            if field_ndx < len(row["values"]):
+                this_field_value = row["values"][field_ndx]
+                eq = check_equality(this_field_value, this_api_value)
+                if not eq:
+                    logger.info(
+                        f"Does not match. [eq {eq}] [field {f}] [field val {this_field_value}] != [api val {this_api_value}]"
+                    )
+                equality_results.append(eq)
+            else:
+                # Log a message if field_ndx does not exist
+                logger.info(
+                    f"Index {field_ndx} out of range for 'values' in row. Max index is {len(row['values']) - 1}"
+                )
+                logger.info(
+                    f"Field '{f}' with value '{this_api_value}' at index '{field_ndx}' is missing from test tables 'values'."
+                )
+
+        if all(equality_results):
+            count(summary, "correct_fields")
+        else:
+            count(summary, "incorrect_fields")
+
+
 def api_check(json_test_tables):
     combined_summary = {"endpoints": 0, "correct_rows": 0, "incorrect_rows": 0}
 
     for endo in json_test_tables:
         count(combined_summary, "endpoints")
         endpoint = endo["endpoint"]
-        report_id = endo["report_id"]
         summary = {}
-        equality_results = []
 
         logger.info(f"-------------------- {endpoint} --------------------")
-
-        for row_ndx, row in enumerate(endo["rows"]):
-            count(summary, "total_rows")
-
-            if False in equality_results:
-                count(combined_summary, "incorrect_rows")
-            else:
-                count(combined_summary, "correct_rows")
-
-            equality_results = []
-
-            for field_ndx, f in enumerate(row["fields"]):
-                # logger.info(f"Checking /{endpoint} {report_id} {f}")
-                # logger.info(f"{get_api_values(endpoint, report_id, f)}")
-                api_values = get_api_values(endpoint, report_id, f)
-                this_api_value = api_values[row_ndx]
-
-                # Check if field_ndx exists in row["values"]
-                if field_ndx < len(row["values"]):
-                    this_field_value = row["values"][field_ndx]
-                    eq = check_equality(this_field_value, this_api_value)
-                    if not eq:
-                        logger.info(
-                            f"Does not match. [eq {eq}] [field {f}] [field val {this_field_value}] != [api val {this_api_value}]"
-                        )
-                    equality_results.append(eq)
-                else:
-                    # Log a message if field_ndx does not exist
-                    logger.info(
-                        f"Index {field_ndx} out of range for 'values' in row. Max index is {len(row['values']) - 1}"
-                    )
-                    logger.info(
-                        f"Field '{f}' with value '{this_api_value}' at index '{field_ndx}' is missing from test tables 'values'."
-                    )
-
-            if all(equality_results):
-                count(summary, "correct_fields")
-            else:
-                count(summary, "incorrect_fields")
+        process_singletons(endo, summary)
+        process_rows(endo, combined_summary, summary)
 
         logger.info(summary)
         combined_summary = combine_counts(combined_summary, summary)
@@ -252,7 +267,7 @@ def api_check(json_test_tables):
 def run_end_to_end(user, audit_header, result):
     """Attempts to migrate the given audit"""
     try:
-        sac = setup_sac(user, audit_header)
+        sac, gen_api_data = setup_sac(user, audit_header)
 
         if sac.general_information["audit_type"] == "alternative-compliance-engagement":
             logger.info(
@@ -271,6 +286,13 @@ def run_end_to_end(user, audit_header, result):
                 (_, json, _) = builder_loader(fun, section)
                 json_test_tables.append(json)
 
+            # Append total amount expended to general table checker
+            gen_api_data["singletons"]["total_amount_expended"] = sac.federal_awards[
+                "FederalAwards"
+            ]["total_amount_expended"]
+
+            json_test_tables.append(gen_api_data)
+
             record_dummy_pdf_object(sac, user)
 
             step_through_certifications(sac)

diff --git a/backend/census_historical_migration/historic_data_loader.py b/backend/census_historical_migration/historic_data_loader.py
@@ -1,6 +1,6 @@
 import logging
 from .models import ELECAUDITHEADER as AuditHeader
-from .workbooklib.end_to_end_core import run_end_to_end
+from .end_to_end_core import run_end_to_end
 
 from django.contrib.auth import get_user_model
 from django.core.paginator import Paginator
@@ -51,7 +51,7 @@ def log_results(result_log, error_count, total_count):
     logger.info("********* Loader Summary ***************")
 
     for k, v in result_log.items():
-        logger.info(k, v)
+        logger.info(f"{k}, {v}")
         logger.info("-------------------")
 
     logger.info(f"{error_count} errors out of {total_count}")
@@ -67,7 +67,7 @@ def create_or_get_user():
     if users:
         user = users.first()
     else:
-        logger.info("Creating user", user_email, user_name)
+        logger.info("Creating user %s %s", user_email, user_name)
         user = User(username=user_name, email=user_email)
         user.save()
 

diff --git a/backend/census_historical_migration/management/commands/csv_to_postgres.py b/backend/census_historical_migration/management/commands/csv_to_postgres.py
@@ -87,16 +87,16 @@ def process_csv_files(self, folder, chunk_size):
     def display_row_counts(self, models):
         for mdl in models:
             row_count = mdl.objects.all().count()
-            logger.info(f"{row_count} in ", mdl)
+            logger.info(f"{row_count} in {mdl}")
 
     def delete_data(self):
         for mdl in census_to_gsafac_models:
-            logger.info("Deleting ", mdl)
+            logger.info("Deleting %s", mdl)
             mdl.objects.all().delete()
 
     def sample_data(self):
         for mdl in census_to_gsafac_models:
-            logger.info("Sampling ", mdl)
+            logger.info("Sampling %s", mdl)
             rows = mdl.objects.all()[:1]
             for row in rows:
                 for col in mdl._meta.fields:
@@ -116,11 +116,11 @@ def get_s3_object(self, bucket_name, key, model_obj):
         return file
 
     def get_model_name(self, name):
-        logger.info("Processing ", name)
+        logger.info("Processing %s", name)
         file_name = name.split("/")[-1].split(".")[0]
         for model_name in census_to_gsafac_model_names:
             if file_name.lower().startswith(model_name):
-                logger.info("model_name = ", model_name)
+                logger.info("model_name = %s", model_name)
                 return model_name
         logger.error("Could not find a matching model for ", name)
         return None
@@ -139,5 +139,5 @@ def load_data(self, file, model_obj, chunk_size):
                 obj = model_obj(**row)
                 obj.save()
             rows_loaded += df.shape[0]
-            logger.info(f"Loaded {rows_loaded} rows in ", model_obj)
+            logger.info(f"Loaded {rows_loaded} rows in {model_obj}")
         return None
diff --git a/backend/census_historical_migration/management/commands/historic_data_migrator.py b/backend/census_historical_migration/management/commands/historic_data_migrator.py
@@ -11,7 +11,7 @@
     create_or_get_user,
     log_results,
 )
-from census_historical_migration.workbooklib.end_to_end_core import run_end_to_end
+from census_historical_migration.end_to_end_core import run_end_to_end
 from django.conf import settings
 
 logger = logging.getLogger(__name__)

diff --git a/backend/census_historical_migration/sac_general_lib/audit_information.py b/backend/census_historical_migration/sac_general_lib/audit_information.py
@@ -1,5 +1,7 @@
 import re
 
+from django.conf import settings
+from ..api_test_helpers import extract_api_data
 from ..transforms.xform_string_to_int import string_to_int
 from ..transforms.xform_string_to_bool import string_to_bool
 from ..exception_utils import DataMigrationError
@@ -19,7 +21,7 @@ def xform_apply_default_thresholds(value):
     str_value = string_to_string(value)
     if str_value == "":
         # FIXME-MSHD: This is a transformation that we may want to record
-        return -1
+        return settings.GSA_MIGRATION_INT
     return string_to_int(str_value)
 
 
@@ -163,7 +165,8 @@ def audit_information(audit_header):
     }
     audit_info["agencies"] = list(agencies_prefixes)
 
-    # Validate against the schema
     audit.validators.validate_audit_information_json(audit_info)
 
-    return audit_info
+    api_data = extract_api_data(mappings, audit_info)
+
+    return (audit_info, api_data)