Skip to content

Commit

Permalink
Merge pull request #3086 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Dec 26, 2023
2 parents a354bc0 + 84b08a1 commit c212fea
Show file tree
Hide file tree
Showing 22 changed files with 282 additions and 226 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/historic-data-migrator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ jobs:
cf_password: ${{ secrets.CF_PASSWORD }}
cf_org: gsa-tts-oros-fac
cf_space: ${{ env.space }}
command: cf run-task gsa-fac -k 2G -m 2G --name historic_data_migrator --command "python manage.py historic_data_migrator --dbkeys ${{ inputs.dbkeys }} --years ${{ inputs.years }} --email ${{ inputs.email }}"
command: cf run-task gsa-fac -k 2G -m 2G --name historic_data_migrator --command "python manage.py historic_data_migrator --dbkeys ${{ inputs.dbkeys }} --years ${{ inputs.years }}"
62 changes: 62 additions & 0 deletions backend/census_historical_migration/api_test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from audit.utils import Util
from .base_field_maps import FormFieldInDissem, WorkbookFieldInDissem
from .sac_general_lib.report_id_generator import xform_dbkey_to_report_id
from .workbooklib.excel_creation_utils import apply_conversion_function


def generate_dissemination_test_table(
audit_header, api_endpoint, mappings=None, objects=None
):
"""Generates a test table for verifying the API queries results."""
table = {}
table["endpoint"] = api_endpoint
table["report_id"] = xform_dbkey_to_report_id(audit_header)

if mappings and objects:
table["rows"] = list()

for o in objects:
test_obj = {}
test_obj["fields"] = []
test_obj["values"] = []
for m in mappings:
# What if we only test non-null values?
raw_value = getattr(o, m.in_db, None)
attribute_value = apply_conversion_function(
raw_value, m.default, m.type
)
if (attribute_value is not None) and (attribute_value != ""):
if m.in_dissem == WorkbookFieldInDissem:
test_obj["fields"].append(m.in_sheet)
test_obj["values"].append(m.type(attribute_value))
else:
test_obj["fields"].append(m.in_dissem)
test_obj["values"].append(m.type(attribute_value))

table["rows"].append(test_obj)
else:
table["singletons"] = dict()

return table


def extract_api_data(mappings, section_object):
"""Extract data for verifying the API queries results."""
table = {}

for item in mappings:
value = section_object[item.in_form]

# Apply same transformations as in `intake_to_dissemination.py`
if item.type == bool:
value = Util.bool_to_yes_no(value)
elif item.type == list:
value = Util.json_array_to_str(value)

if item.in_dissem:
if item.in_dissem == FormFieldInDissem:
table[item.in_form] = value
else:
table[item.in_dissem] = value

return table
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from .post_upload_utils import record_dummy_pdf_object
from ..exception_utils import (
from .workbooklib.post_upload_utils import record_dummy_pdf_object
from .exception_utils import (
DataMigrationError,
DataMigrationValueError,
)
from ..workbooklib.workbook_builder_loader import (
from .workbooklib.workbook_builder_loader import (
workbook_builder_loader,
)
from ..workbooklib.workbook_section_handlers import (
from .workbooklib.workbook_section_handlers import (
sections_to_handlers,
)
from ..sac_general_lib.sac_creator import setup_sac
from ..models import (
from .sac_general_lib.sac_creator import setup_sac
from .models import (
ReportMigrationStatus,
MigrationErrorDetail,
)
Expand Down Expand Up @@ -51,18 +51,16 @@


def step_through_certifications(sac):
stati = [
SingleAuditChecklist.STATUS.IN_PROGRESS,
SingleAuditChecklist.STATUS.READY_FOR_CERTIFICATION,
SingleAuditChecklist.STATUS.AUDITOR_CERTIFIED,
SingleAuditChecklist.STATUS.AUDITEE_CERTIFIED,
SingleAuditChecklist.STATUS.CERTIFIED,
SingleAuditChecklist.STATUS.SUBMITTED,
SingleAuditChecklist.STATUS.DISSEMINATED,
]
for status in stati:
sac.transition_name.append(status)
sac.transition_date.append(datetime.now(timezone.utc))
sac.transition_to_ready_for_certification()
sac.transition_to_auditor_certified()
sac.transition_to_auditee_certified()

# FIXME-MSHD: We have no method transition_to_certified()
sac.transition_name.append(SingleAuditChecklist.STATUS.CERTIFIED)
sac.transition_date.append(datetime.now(timezone.utc))

sac.transition_to_submitted()
sac.transition_to_disseminated()
sac.save()


Expand Down Expand Up @@ -128,20 +126,20 @@ def are_they_both_none_or_empty(a, b):
return a_val and b_val


def check_equality(in_wb, in_json):
def check_equality(in_wb, in_api):
# Type requirement is sometimes just 'N'
if in_wb in ["Y", "N"] and isinstance(in_json, bool):
return (True if in_wb == "Y" else False) == in_json
elif just_numbers(in_wb) and just_numbers(in_json):
if in_wb in ["Y", "N"] and isinstance(in_api, bool):
return (True if in_wb == "Y" else False) == in_api
elif just_numbers(in_wb) and just_numbers(in_api):
return (
True if math.isclose(float(in_wb), float(in_json), rel_tol=1e-1) else False
True if math.isclose(float(in_wb), float(in_api), rel_tol=1e-1) else False
)
elif isinstance(in_wb, str) and isinstance(in_json, str):
return _compare_multiline_strings(in_wb, in_json)
elif in_wb is None or in_json is None:
return are_they_both_none_or_empty(in_wb, in_json)
elif isinstance(in_wb, str) and isinstance(in_api, str):
return _compare_multiline_strings(in_wb, in_api)
elif in_wb is None or in_api is None:
return are_they_both_none_or_empty(in_wb, in_api)
else:
return in_wb == in_json
return str(in_wb) == str(in_api)


def _compare_multiline_strings(str1, str2):
Expand Down Expand Up @@ -192,56 +190,73 @@ def combine_counts(combined, d):
return combined


def process_singletons(endo, summary):
"""Process the singletons in the JSON test table"""
for field, value in endo.get("singletons", {}).items():
api_values = get_api_values(endo["endpoint"], endo["report_id"], field)
eq = check_equality(value, api_values[0])
if eq:
count(summary, "correct_fields")
else:
logger.info(
f"Does not match. [eq {eq}] [field {field}] [field val {value}] != [api val {api_values[0]}]"
)
count(summary, "incorrect_fields")


def process_rows(endo, combined_summary, summary):
"""Process the rows in the JSON test table"""
rows = endo.get("rows", [])
equality_results = []
for row_ndx, row in enumerate(rows):
count(summary, "total_rows")

if False in equality_results:
count(combined_summary, "incorrect_rows")
else:
count(combined_summary, "correct_rows")

equality_results = []

for field_ndx, f in enumerate(row["fields"]):
# logger.info(f"Checking /{endo["endpoint"]} {endo["report_id"]} {f}")
# logger.info(f"{get_api_values(endo["endpoint"], endo["report_id"], f)}")
api_values = get_api_values(endo["endpoint"], endo["report_id"], f)
this_api_value = api_values[row_ndx]
if field_ndx < len(row["values"]):
this_field_value = row["values"][field_ndx]
eq = check_equality(this_field_value, this_api_value)
if not eq:
logger.info(
f"Does not match. [eq {eq}] [field {f}] [field val {this_field_value}] != [api val {this_api_value}]"
)
equality_results.append(eq)
else:
# Log a message if field_ndx does not exist
logger.info(
f"Index {field_ndx} out of range for 'values' in row. Max index is {len(row['values']) - 1}"
)
logger.info(
f"Field '{f}' with value '{this_api_value}' at index '{field_ndx}' is missing from test tables 'values'."
)

if all(equality_results):
count(summary, "correct_fields")
else:
count(summary, "incorrect_fields")


def api_check(json_test_tables):
combined_summary = {"endpoints": 0, "correct_rows": 0, "incorrect_rows": 0}

for endo in json_test_tables:
count(combined_summary, "endpoints")
endpoint = endo["endpoint"]
report_id = endo["report_id"]
summary = {}
equality_results = []

logger.info(f"-------------------- {endpoint} --------------------")

for row_ndx, row in enumerate(endo["rows"]):
count(summary, "total_rows")

if False in equality_results:
count(combined_summary, "incorrect_rows")
else:
count(combined_summary, "correct_rows")

equality_results = []

for field_ndx, f in enumerate(row["fields"]):
# logger.info(f"Checking /{endpoint} {report_id} {f}")
# logger.info(f"{get_api_values(endpoint, report_id, f)}")
api_values = get_api_values(endpoint, report_id, f)
this_api_value = api_values[row_ndx]

# Check if field_ndx exists in row["values"]
if field_ndx < len(row["values"]):
this_field_value = row["values"][field_ndx]
eq = check_equality(this_field_value, this_api_value)
if not eq:
logger.info(
f"Does not match. [eq {eq}] [field {f}] [field val {this_field_value}] != [api val {this_api_value}]"
)
equality_results.append(eq)
else:
# Log a message if field_ndx does not exist
logger.info(
f"Index {field_ndx} out of range for 'values' in row. Max index is {len(row['values']) - 1}"
)
logger.info(
f"Field '{f}' with value '{this_api_value}' at index '{field_ndx}' is missing from test tables 'values'."
)

if all(equality_results):
count(summary, "correct_fields")
else:
count(summary, "incorrect_fields")
process_singletons(endo, summary)
process_rows(endo, combined_summary, summary)

logger.info(summary)
combined_summary = combine_counts(combined_summary, summary)
Expand All @@ -252,7 +267,7 @@ def api_check(json_test_tables):
def run_end_to_end(user, audit_header, result):
"""Attempts to migrate the given audit"""
try:
sac = setup_sac(user, audit_header)
sac, gen_api_data = setup_sac(user, audit_header)

if sac.general_information["audit_type"] == "alternative-compliance-engagement":
logger.info(
Expand All @@ -271,6 +286,13 @@ def run_end_to_end(user, audit_header, result):
(_, json, _) = builder_loader(fun, section)
json_test_tables.append(json)

# Append total amount expended to general table checker
gen_api_data["singletons"]["total_amount_expended"] = sac.federal_awards[
"FederalAwards"
]["total_amount_expended"]

json_test_tables.append(gen_api_data)

record_dummy_pdf_object(sac, user)

step_through_certifications(sac)
Expand Down
6 changes: 3 additions & 3 deletions backend/census_historical_migration/historic_data_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from .models import ELECAUDITHEADER as AuditHeader
from .workbooklib.end_to_end_core import run_end_to_end
from .end_to_end_core import run_end_to_end

from django.contrib.auth import get_user_model
from django.core.paginator import Paginator
Expand Down Expand Up @@ -51,7 +51,7 @@ def log_results(result_log, error_count, total_count):
logger.info("********* Loader Summary ***************")

for k, v in result_log.items():
logger.info(k, v)
logger.info(f"{k}, {v}")
logger.info("-------------------")

logger.info(f"{error_count} errors out of {total_count}")
Expand All @@ -67,7 +67,7 @@ def create_or_get_user():
if users:
user = users.first()
else:
logger.info("Creating user", user_email, user_name)
logger.info("Creating user %s %s", user_email, user_name)
user = User(username=user_name, email=user_email)
user.save()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ def process_csv_files(self, folder, chunk_size):
def display_row_counts(self, models):
for mdl in models:
row_count = mdl.objects.all().count()
logger.info(f"{row_count} in ", mdl)
logger.info(f"{row_count} in {mdl}")

def delete_data(self):
for mdl in census_to_gsafac_models:
logger.info("Deleting ", mdl)
logger.info("Deleting %s", mdl)
mdl.objects.all().delete()

def sample_data(self):
for mdl in census_to_gsafac_models:
logger.info("Sampling ", mdl)
logger.info("Sampling %s", mdl)
rows = mdl.objects.all()[:1]
for row in rows:
for col in mdl._meta.fields:
Expand All @@ -116,11 +116,11 @@ def get_s3_object(self, bucket_name, key, model_obj):
return file

def get_model_name(self, name):
logger.info("Processing ", name)
logger.info("Processing %s", name)
file_name = name.split("/")[-1].split(".")[0]
for model_name in census_to_gsafac_model_names:
if file_name.lower().startswith(model_name):
logger.info("model_name = ", model_name)
logger.info("model_name = %s", model_name)
return model_name
logger.error("Could not find a matching model for ", name)
return None
Expand All @@ -139,5 +139,5 @@ def load_data(self, file, model_obj, chunk_size):
obj = model_obj(**row)
obj.save()
rows_loaded += df.shape[0]
logger.info(f"Loaded {rows_loaded} rows in ", model_obj)
logger.info(f"Loaded {rows_loaded} rows in {model_obj}")
return None
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
create_or_get_user,
log_results,
)
from census_historical_migration.workbooklib.end_to_end_core import run_end_to_end
from census_historical_migration.end_to_end_core import run_end_to_end
from django.conf import settings

logger = logging.getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re

from django.conf import settings
from ..api_test_helpers import extract_api_data
from ..transforms.xform_string_to_int import string_to_int
from ..transforms.xform_string_to_bool import string_to_bool
from ..exception_utils import DataMigrationError
Expand All @@ -19,7 +21,7 @@ def xform_apply_default_thresholds(value):
str_value = string_to_string(value)
if str_value == "":
# FIXME-MSHD: This is a transformation that we may want to record
return -1
return settings.GSA_MIGRATION_INT
return string_to_int(str_value)


Expand Down Expand Up @@ -163,7 +165,8 @@ def audit_information(audit_header):
}
audit_info["agencies"] = list(agencies_prefixes)

# Validate against the schema
audit.validators.validate_audit_information_json(audit_info)

return audit_info
api_data = extract_api_data(mappings, audit_info)

return (audit_info, api_data)
Loading

0 comments on commit c212fea

Please sign in to comment.