Skip to content

Commit

Permalink
Merge pull request #2914 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Dec 2, 2023
2 parents dc82fee + 7ad3cbb commit 0e4dab6
Show file tree
Hide file tree
Showing 25 changed files with 423 additions and 2,264 deletions.
14 changes: 12 additions & 2 deletions backend/census_historical_migration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ python manage.py csv_to_postgres --clean True

## How to load test Census data into Postgres

1. Download test Census data from https://drive.google.com/drive/folders/1TY-7yWsMd8DsVEXvwrEe_oWW1iR2sGoy into census_historical_migration/data folder.
1. Download test Census data from https://drive.google.com/drive/folders/1TY-7yWsMd8DsVEXvwrEe_oWW1iR2sGoy into census_historical_migration/data folder.
NOTE: Never check in the census_historical_migration/data folder into GitHub.

2. In the FAC/backend folder, run the following to load CSV files from census_historical_migration/data folder into fac-census-to-gsafac-s3 bucket.
Expand All @@ -55,13 +55,23 @@ docker compose run --rm web python manage.py csv_to_postgres --folder data --chu
```

### How to run the historic data migrator:
To migrate individual dbkeys:
```
docker compose run --rm web python manage.py historic_data_migrator
--years 22 \
--dbkeys 177310
```
- `year` and `dbkey` are optional. The script will use default values for these if they aren't provided.

To migrate dbkeys for a given year with pagination:
```
docker compose run --rm web python manage.py run_paginated_migration
--year 2022 \
--page_size 1000
--pages 1, 3, 4
```
- `batchSize` and `pages` are optional. The script will use default values for these if they aren't provided.

### How to run the historic workbook generator:
```
docker compose run --rm web python manage.py historic_workbook_generator \
Expand All @@ -74,6 +84,6 @@ docker compose run --rm web python manage.py historic_workbook_generator \

### How to trigger historic data migrator from GitHub:
- Go to GitHub Actions and select `Historic data migrator` action
- Next, click on `Run workflow` on top right and
- Next, click on `Run workflow` on top right and
- Provide the target `environment` along with optional parameters such as `dbkeys` and `years`
- Click `Run`
4 changes: 2 additions & 2 deletions backend/census_historical_migration/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.contrib import admin # noqa: F401
from django.contrib import admin

from census_historical_migration.models import (
from .models import (
ELECAUDITHEADER,
ELECEINS,
ELECAUDITFINDINGS,
Expand Down
57 changes: 35 additions & 22 deletions backend/census_historical_migration/historic_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,49 @@
from .workbooklib.end_to_end_core import run_end_to_end

from django.contrib.auth import get_user_model
from django.core.paginator import Paginator


User = get_user_model()


def load_historic_data_for_year(audit_year):
def load_historic_data_for_year(audit_year, page_size, pages):
"""Iterates over and processes submissions for the given audit year"""
result_log = {}
total_count = error_count = 0
user = create_or_get_user()
submissions_for_year = Gen.objects.filter(AUDITYEAR=audit_year)

for submission in submissions_for_year:
dbkey = submission.DBKEY
result = {"success": [], "errors": []}

try:
# Migrate a single submission
run_end_to_end(user, dbkey, audit_year, result)
except Exception as exc:
result["errors"].append(f"{exc}")

result_log[(audit_year, dbkey)] = result
total_count += 1

if len(result["errors"]) > 0:
error_count += 1
if total_count % 5 == 0:
print(f"Processed = {total_count}, Errors = {error_count}")
if error_count > 5:
break
submissions_for_year = Gen.objects.filter(AUDITYEAR=audit_year).order_by(
"ELECAUDITHEADERID"
)
paginator = Paginator(submissions_for_year, page_size)

print(f"{submissions_for_year.count()} submissions found for {audit_year}")

for page_number in pages:
page = paginator.page(page_number)
print(
f"Processing page {page_number} with {page.object_list.count()} submissions."
)

for submission in page.object_list:
dbkey = submission.DBKEY
result = {"success": [], "errors": []}

try:
# Migrate a single submission
run_end_to_end(user, dbkey, audit_year, result)
except Exception as exc:
result["errors"].append(f"{exc}")

result_log[(audit_year, dbkey)] = result
total_count += 1

if len(result["errors"]) > 0:
error_count += 1
if total_count % 5 == 0:
print(f"Processed = {total_count}, Errors = {error_count}")
if error_count > 5:
break

print("********* Loader Summary ***************")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from ...historic_data_loader import load_historic_data_for_year

from django.core.management.base import BaseCommand

import logging
import sys


logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


class Command(BaseCommand):
help = """
Migrate from Census tables to GSAFAC tables for a given year using pagination
Usage:
manage.py run_migration
--year <audit year>
--pageSize <page size>
--pages <comma separated pages>
"""

def add_arguments(self, parser):
parser.add_argument("--year", help="4-digit Audit Year")
parser.add_argument("--page_size", type=int, required=False, default=5)
parser.add_argument("--pages", type=str, required=False, default="1")

def handle(self, *args, **options):
year = options.get("year")
if not year:
print("Please specify an audit year")
return

try:
pages_str = options["pages"]
pages = list(map(lambda d: int(d), pages_str.split(",")))
except ValueError:
logger.error(f"Found a non-integer in pages '{pages_str}'")
sys.exit(-1)

load_historic_data_for_year(year, options["page_size"], pages)
160 changes: 104 additions & 56 deletions backend/census_historical_migration/sac_general_lib/audit_information.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from census_historical_migration.workbooklib.census_models.census import (
CensusGen22 as Gen,
CensusCfda22 as Cfda,
CensusFindings22 as Finding,
)
from census_historical_migration.base_field_maps import FormFieldMap, FormFieldInDissem
from census_historical_migration.sac_general_lib.utils import (
import re

from ..transforms.xform_string_to_bool import string_to_bool
from ..exception_utils import DataMigrationError
from ..transforms.xform_string_to_string import string_to_string
from ..workbooklib.excel_creation_utils import get_audits

from ..base_field_maps import FormFieldMap, FormFieldInDissem
from ..sac_general_lib.utils import (
_create_json_from_db_object,
)
import audit.validators
Expand All @@ -14,74 +16,130 @@
mappings = [
FormFieldMap(
"dollar_threshold",
"dollarthreshold",
"DOLLARTHRESHOLD",
FormFieldInDissem,
settings.DOLLAR_THRESHOLD,
int,
),
FormFieldMap("gaap_results", "typereport_fs", FormFieldInDissem, [], list),
FormFieldMap(
"is_going_concern_included", "goingconcern", FormFieldInDissem, None, bool
"is_going_concern_included", "GOINGCONCERN", FormFieldInDissem, None, bool
),
FormFieldMap(
"is_internal_control_deficiency_disclosed",
"materialweakness",
"MATERIALWEAKNESS",
FormFieldInDissem,
None,
bool,
),
FormFieldMap(
"is_internal_control_material_weakness_disclosed",
"materialweakness_mp",
"MATERIALWEAKNESS_MP",
FormFieldInDissem,
None,
bool,
),
FormFieldMap(
"is_material_noncompliance_disclosed",
"materialnoncompliance",
"MATERIALNONCOMPLIANCE",
FormFieldInDissem,
None,
bool,
),
FormFieldMap(
"is_aicpa_audit_guide_included",
"reportablecondition",
"REPORTABLECONDITION",
FormFieldInDissem,
None,
bool,
),
FormFieldMap("is_low_risk_auditee", "lowrisk", FormFieldInDissem, False, bool),
FormFieldMap("agencies", "pyschedule", "agencies_with_prior_findings", [], list),
FormFieldMap("is_low_risk_auditee", "LOWRISK", FormFieldInDissem, False, bool),
FormFieldMap("agencies", "PYSCHEDULE", "agencies_with_prior_findings", [], list),
]


def _get_agency_prefixes(dbkey):
"""Returns the agency prefixes for the given dbkey."""
agencies = set()
cfdas = Cfda.select().where(Cfda.dbkey == dbkey)
audits = get_audits(dbkey)

for cfda in cfdas:
agency_prefix = int(cfda.cfda.split(".")[0])
agencies.add(agency_prefix)
for audit_detail in audits:
agencies.add(string_to_string(audit_detail.CFDA_PREFIX))

return agencies


def _get_gaap_results(dbkey):
findings = Finding.select().where(Finding.dbkey == dbkey)
gaap_results = {}
# FIXME: How do we retrieve gaap_results from the historic data? I could not find corresponding fields in Census tables.
for finding in findings:
if finding.modifiedopinion == "Y":
gaap_results["unmodified_opinion"] = 1
if finding.materialweakness == "Y":
gaap_results["adverse_opinion"] = 1
if finding.significantdeficiency == "Y":
gaap_results["disclaimer_of_opinion"] = 1
return gaap_results.keys()


def xform_framework_basis(basis):
"""Transforms the framework basis from Census format to FAC format."""
mappings = {
r"cash": "cash_basis",
# FIXME-MSHD: `regulatory` could mean tax_basis or contractual_basis
# or a new basis we don't have yet in FAC validation schema, I don't the answer.
# Defaulting to `contractual_basis` until team decide ?????
r"regulatory": "contractual_basis",
# r"????": "tax_basis", FIXME-MSHD: Could not find any instance of this in historic data
r"other": "other_basis",
}
# Check each pattern in the mappings with case-insensitive search
for pattern, value in mappings.items():
if re.search(pattern, basis, re.IGNORECASE):
# FIXME-MSHD: This is a transformation that we may want to record
return value

raise DataMigrationError(
f"Could not find a match for historic framework basis: '{basis}'"
)


def xform_census_keys_to_fac_options(census_keys, fac_options):
"""Maps the census keys to FAC options."""

if "U" in census_keys:
fac_options.append("unmodified_opinion")
if "Q" in census_keys:
fac_options.append("qualified_opinion")
if "A" in census_keys:
fac_options.append("adverse_opinion")
if "D" in census_keys:
fac_options.append("disclaimer_of_opinion")


def _get_sp_framework_gaap_results(audit_header):
"""Returns the SP Framework and GAAP results for a given audit header."""

sp_framework_gaap_data = string_to_string(audit_header.TYPEREPORT_FS).upper()
if not sp_framework_gaap_data:
raise DataMigrationError(
f"GAAP details are missing for DBKEY: {audit_header.DBKEY}"
)

sp_framework_gaap_results = {}
sp_framework_gaap_results["gaap_results"] = []
xform_census_keys_to_fac_options(
sp_framework_gaap_data, sp_framework_gaap_results["gaap_results"]
)
if "S" in sp_framework_gaap_data:
sp_framework_gaap_results["gaap_results"].append("not_gaap")
sp_framework_gaap_results["is_sp_framework_required"] = string_to_bool(
audit_header.SP_FRAMEWORK_REQUIRED
)
sp_framework_gaap_results["sp_framework_opinions"] = []
sp_framework_opinions = string_to_string(
audit_header.TYPEREPORT_SP_FRAMEWORK
).upper()
xform_census_keys_to_fac_options(
sp_framework_opinions, sp_framework_gaap_results["sp_framework_opinions"]
)
sp_framework_gaap_results["sp_framework_basis"] = []
basis = xform_framework_basis(string_to_string(audit_header.SP_FRAMEWORK))
sp_framework_gaap_results["sp_framework_basis"].append(basis)

return sp_framework_gaap_results


# FIXME-MSHD: Not being used, but we may need it in the future
def _xform_agencies(audit_info):
"""Transforms the agencies from Census format to FAC format."""

new_audit_info = audit_info.copy()
# Apply transformation to each key
transformed_agencies = [
Expand All @@ -92,29 +150,19 @@ def _xform_agencies(audit_info):
return new_audit_info


def _build_initial_audit_information(dbkey):
gaap_results = _get_gaap_results(dbkey)
agencies_prefixes = _get_agency_prefixes(dbkey)
gobj = Gen.select().where(Gen.dbkey == dbkey).first()
audit_information = _create_json_from_db_object(gobj, mappings)
audit_information["gaap_results"] = list(gaap_results)
audit_information["agencies"] = list(agencies_prefixes)
return audit_information


def _audit_information(dbkey):
audit_information = _build_initial_audit_information(dbkey)

# List of transformation functions
transformations = [
_xform_agencies,
]
def audit_information(audit_header):
"""Generates audit information JSON."""

# Apply transformations
for transform in transformations:
audit_information = transform(audit_information)
results = _get_sp_framework_gaap_results(audit_header)
agencies_prefixes = _get_agency_prefixes(audit_header.DBKEY)
audit_info = _create_json_from_db_object(audit_header, mappings)
audit_info = {
key: results.get(key, audit_info.get(key))
for key in set(audit_info) | set(results)
}
audit_info["agencies"] = list(agencies_prefixes)

# Validate against the schema
audit.validators.validate_audit_information_json(audit_information)
audit.validators.validate_audit_information_json(audit_info)

return audit_information
return audit_info
Loading

0 comments on commit 0e4dab6

Please sign in to comment.