diff --git a/backend/audit/cross_validation/check_award_ref_declaration.py b/backend/audit/cross_validation/check_award_ref_declaration.py index d04eebba8..ec90caabc 100644 --- a/backend/audit/cross_validation/check_award_ref_declaration.py +++ b/backend/audit/cross_validation/check_award_ref_declaration.py @@ -22,19 +22,82 @@ def check_award_ref_declaration(sac_dict, *_args, **_kwargs): declared_award_refs = set() reported_award_refs = set() errors = [] - + declared_award_ref_max_length = 0 + reported_award_ref_max_length = 0 for award in federal_awards: award_ref = award.get("award_reference") if award_ref: declared_award_refs.add(award_ref) + if len(award_ref) > declared_award_ref_max_length: + declared_award_ref_max_length = len(award_ref) for finding in findings_uniform_guidance: award_ref = finding["program"]["award_reference"] if award_ref: reported_award_refs.add(award_ref) + if len(award_ref) > reported_award_ref_max_length: + reported_award_ref_max_length = len(award_ref) + + updated_declared_refs, updated_reported_refs = _normalize_award_ref_lengths( + declared_award_ref_max_length, + reported_award_ref_max_length, + federal_awards, + findings_uniform_guidance, + ) + if updated_declared_refs: + declared_award_refs = updated_declared_refs + if updated_reported_refs: + reported_award_refs = updated_reported_refs difference = reported_award_refs.difference(declared_award_refs) if difference: errors.append({"error": err_award_ref_not_declared(list(difference))}) return errors + + +def _normalize_award_ref_lengths( + declared_award_ref_max_length, + reported_award_ref_max_length, + federal_awards, + findings_uniform_guidance, +): + """ + Normalize the lengths of the award references in the Federal Awards and + Federal Awards Audit Findings workbooks before validation. + """ + reported_award_refs = set() + declared_award_refs = set() + if declared_award_ref_max_length > reported_award_ref_max_length: + # This is unlikely to happen, but still a good check. It means + # that the version of the Federal Awards workbook is newer than + # the version of the Federal Awards Audit Findings workbook. + diff = declared_award_ref_max_length - reported_award_ref_max_length + padding = "0" * diff + + for finding in findings_uniform_guidance: + award_ref = finding["program"]["award_reference"] + if award_ref: + award_ref = ( + f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}" + ) + reported_award_refs.add(award_ref) + elif declared_award_ref_max_length < reported_award_ref_max_length: + # This is more likely to happen. It means the version of + # the Federal Awards Audit Findings workbook is newer than + # the version of the Federal Awards workbook. + diff = reported_award_ref_max_length - declared_award_ref_max_length + padding = "0" * diff + + for award in federal_awards: + award_ref = award.get("award_reference") + if award_ref: + award_ref = ( + f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}" + ) + declared_award_refs.add(award_ref) + else: + # If the lengths are the same, do nothing. + pass + + return declared_award_refs, reported_award_refs diff --git a/backend/audit/cross_validation/check_findings_count_consistency.py b/backend/audit/cross_validation/check_findings_count_consistency.py index 08d1fcfef..db753e00b 100644 --- a/backend/audit/cross_validation/check_findings_count_consistency.py +++ b/backend/audit/cross_validation/check_findings_count_consistency.py @@ -26,37 +26,137 @@ def check_findings_count_consistency(sac_dict, *_args, **_kwargs): expected_award_refs_count = {} found_award_refs_count = defaultdict(int) errors = [] - if ( + if _should_skip_validation(data_source): + return errors + + expected_award_refs_count, declared_award_ref_max_length = _get_federal_award_refs( + federal_awards + ) + found_award_refs_count, reported_award_ref_max_length = _get_findings_award_refs( + findings_uniform_guidance, expected_award_refs_count + ) + + updated_expected_refs_count, updated_found_refs_count = ( + _normalize_award_ref_lengths( + declared_award_ref_max_length, + reported_award_ref_max_length, + federal_awards, + findings_uniform_guidance, + ) + ) + + if updated_expected_refs_count: + expected_award_refs_count = updated_expected_refs_count + + if updated_found_refs_count: + found_award_refs_count = updated_found_refs_count + + errors = _validate_findings(expected_award_refs_count, found_award_refs_count) + + return errors + + +def _should_skip_validation(data_source): + # Skip this validation if it is an historical audit report with incorrect findings count + return ( data_source == settings.CENSUS_DATA_SOURCE and "check_findings_count_consistency" in InvalidRecord.fields["validations_to_skip"] - ): - # Skip this validation if it is an historical audit report with incorrect findings count - return errors + ) + + +def _get_federal_award_refs(federal_awards): + declared_award_ref_max_length = 0 + expected_award_refs_count = {} for award in federal_awards: - award_reference = award.get("award_reference", None) + award_reference = award.get("award_reference") if award_reference: + declared_award_ref_max_length = max( + declared_award_ref_max_length, len(award_reference) + ) expected_award_refs_count[award_reference] = award["program"][ "number_of_audit_findings" ] + return expected_award_refs_count, declared_award_ref_max_length + + +def _get_findings_award_refs(findings_uniform_guidance, expected_award_refs_count): + reported_award_ref_max_length = 0 + found_award_refs_count = defaultdict(int) + for finding in findings_uniform_guidance: award_ref = finding["program"]["award_reference"] - if award_ref in expected_award_refs_count: - found_award_refs_count[award_ref] += 1 + if award_ref: + reported_award_ref_max_length = max( + reported_award_ref_max_length, len(award_ref) + ) + if award_ref in expected_award_refs_count: + found_award_refs_count[award_ref] += 1 + + return found_award_refs_count, reported_award_ref_max_length + +def _validate_findings(expected_award_refs_count, found_award_refs_count): + errors = [] for award_ref, expected in expected_award_refs_count.items(): counted = found_award_refs_count[award_ref] if counted != expected: errors.append( - { - "error": err_findings_count_inconsistent( - expected, - counted, - award_ref, - ) - } + {"error": err_findings_count_inconsistent(expected, counted, award_ref)} ) - return errors + + +def _normalize_award_ref_lengths( + declared_award_ref_max_length, + reported_award_ref_max_length, + federal_awards, + findings_uniform_guidance, +): + """ + Normalize the lengths of the award references in the Federal Awards and + Federal Awards Audit Findings workbooks before validation. + """ + expected_award_refs_count = {} + found_award_refs_count = defaultdict(int) + + if declared_award_ref_max_length != reported_award_ref_max_length: + # Determine the required padding based on the difference in lengths. + diff = abs(reported_award_ref_max_length - declared_award_ref_max_length) + padding = "0" * diff + + if declared_award_ref_max_length < reported_award_ref_max_length: + # This is means the version of the Federal Awards Audit Findings workbook + # is newer than the version of the Federal Awards workbook. + for award in federal_awards: + award_reference = award.get("award_reference") + if award_reference: + award_reference = _pad_award_ref(award_reference, padding) + expected_award_refs_count[award_reference] = award["program"][ + "number_of_audit_findings" + ] + for finding in findings_uniform_guidance: + award_ref = finding["program"]["award_reference"] + if award_ref in expected_award_refs_count: + found_award_refs_count[award_ref] += 1 + else: + # This is unlikely to happen. It means the version of + # the Federal Awards workbook is newer than + # the version of the Federal Awards Audit Findings workbook. + for finding in findings_uniform_guidance: + award_ref = finding["program"]["award_reference"] + if award_ref: + award_ref = _pad_award_ref(award_ref, padding) + if award_ref in expected_award_refs_count: + found_award_refs_count[award_ref] += 1 + else: + # No normalization needed if the lengths match + pass + + return expected_award_refs_count, found_award_refs_count + + +def _pad_award_ref(award_ref, padding): + return f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}" diff --git a/backend/audit/cross_validation/test_check_award_ref_declaration.py b/backend/audit/cross_validation/test_check_award_ref_declaration.py index e64a76868..3ed266948 100644 --- a/backend/audit/cross_validation/test_check_award_ref_declaration.py +++ b/backend/audit/cross_validation/test_check_award_ref_declaration.py @@ -22,6 +22,8 @@ def setUp(self): self.award3 = { "award_reference": f"AWARD-{generate_random_integer(self.AWARD_MIN *3,self.AWARD_MAX *3)}" } + self.award_with_longer_ref = {"award_reference": "AWARD-00123"} + self.award_with_shorter_ref = {"award_reference": "AWARD-0123"} def _make_federal_awards(self, award_refs) -> dict: return { @@ -82,3 +84,21 @@ def test_errors_for_findings_with_undeclared_award_refs(self): self.assertEqual(len(errors), 1) expected_error = err_award_ref_not_declared([self.award2["award_reference"]]) self.assertIn({"error": expected_error}, errors) + + def test_padding_when_declared_award_ref_max_length_greater(self): + """Test case where declared award reference length is greater than reported award reference length.""" + sac = self._make_sac( + [self.award_with_longer_ref], [self.award_with_shorter_ref] + ) + errors = check_award_ref_declaration(sac_validation_shape(sac)) + # No errors expected + self.assertEqual(errors, []) + + def test_padding_when_reported_award_ref_max_length_greater(self): + """Test case where reported award reference length is greater than declared award reference length.""" + sac = self._make_sac( + [self.award_with_shorter_ref], [self.award_with_longer_ref] + ) + errors = check_award_ref_declaration(sac_validation_shape(sac)) + # No errors expected + self.assertEqual(errors, []) diff --git a/backend/audit/cross_validation/test_check_findings_count_consistency.py b/backend/audit/cross_validation/test_check_findings_count_consistency.py index c3c0296ea..9cc722236 100644 --- a/backend/audit/cross_validation/test_check_findings_count_consistency.py +++ b/backend/audit/cross_validation/test_check_findings_count_consistency.py @@ -29,10 +29,12 @@ def _make_federal_awards(self, findings_count) -> dict: } } - def _make_findings_uniform_guidance(self, awards, mismatch) -> dict: + def _make_findings_uniform_guidance(self, awards, mismatch, padding) -> dict: entries = [] for award in awards["FederalAwards"]["federal_awards"]: award_reference = award["award_reference"] + if padding: + award_reference = f"{award_reference.split('-')[0]}-{padding}{award_reference.split('-')[1]}" count = award["program"]["number_of_audit_findings"] for _ in range(count + mismatch): entries.append({"program": {"award_reference": award_reference}}) @@ -48,11 +50,11 @@ def _make_findings_uniform_guidance(self, awards, mismatch) -> dict: return {"FindingsUniformGuidance": findings} - def _make_sac(self, findings_count, mismatch=0) -> SingleAuditChecklist: + def _make_sac(self, findings_count, mismatch=0, padding="") -> SingleAuditChecklist: sac = baker.make(SingleAuditChecklist) sac.federal_awards = self._make_federal_awards(findings_count) sac.findings_uniform_guidance = self._make_findings_uniform_guidance( - sac.federal_awards, mismatch + sac.federal_awards, mismatch, padding ) return sac @@ -101,3 +103,14 @@ def test_declared_findings_exceed_reported_count(self): self._test_findings_count_mismatch( generate_random_integer(2, 4), generate_random_integer(-2, -1) ) + + def test_normalize_award_ref_lengths_with_padding(self): + """ + Ensure that award reference normalization occurs when declared and reported + award reference lengths differ. Leading zeros are added appropriately. + """ + sac = self._make_sac( + generate_random_integer(self.FINDINGS_MIN, self.FINDINGS_MAX), 0, "0" + ) + errors = check_findings_count_consistency(sac_validation_shape(sac)) + self.assertEqual(errors, []) diff --git a/backend/audit/intakelib/checks/check_finding_award_references_pattern.py b/backend/audit/intakelib/checks/check_finding_award_references_pattern.py index 71e16c375..c799ae96f 100644 --- a/backend/audit/intakelib/checks/check_finding_award_references_pattern.py +++ b/backend/audit/intakelib/checks/check_finding_award_references_pattern.py @@ -13,7 +13,6 @@ # A version of this regex also exists in Base.libsonnet AWARD_REFERENCES_REGEX = r"^AWARD-(?!0{4,5}$)[0-9]{4,5}$" -AWARD_REFERENCES_REGEX5 = r"^AWARD-(?!0{5}$)[0-9]{5}$" AWARD_LEN_4_DIGITS = 10 AWARD_LEN_5_DIGITS = 11