#2723: Copying e2e code from /dissemination to `/census_historical_…

…migration` (#2725) * #2723 Added Census historical migration module * #2723 Updated imports * #2723 Module declartion * #2723 Renaming commands * #2723 Importing the correct models * Linting * Renaming commands * Adding "End-to-end workbook testing" section to dissemination README * Adding census_historical_migration README * #2723 Linting --------- Co-authored-by: Hassan D. M. Sambo <[email protected]> Co-authored-by: Phil Dominguez <“[email protected]”>
GSA-TTS · Nov 3, 2023 · bfd444d · bfd444d
1 parent fc1cac6
commit bfd444d
Show file tree

Hide file tree

Showing 25 changed files with 4,076 additions and 0 deletions.
diff --git a/backend/census_historical_migration/README.md b/backend/census_historical_migration/README.md
@@ -0,0 +1,20 @@
+# Census Historical Migration
+
+### How to run the historic data migrator:
+```
+docker compose run web python manage.py historic_data_migrator --email [email protected] \
+  --year 22 \
+  --dbkey 100010
+```
+- The email address currently must be a User in the system. As this has only been run locally so far, it would often be a test account in my local sandbox env.
+- `year` and `dbkey` are optional. The script will use default values for these if they aren't provided.
+
+### How to run the historic workbook generator:
+```
+docker compose run web python manage.py historic_workbook_generator
+  --year 22 \
+  --output <your_output_directory> \
+  --dbkey 100010
+```
+- `year` is optional and defaults to `22`.
+- The `output` directory will be created if it doesn't already exist.
diff --git a/backend/census_historical_migration/__init__.py b/backend/census_historical_migration/__init__.py
diff --git a/backend/census_historical_migration/admin.py b/backend/census_historical_migration/admin.py
@@ -0,0 +1,3 @@
+from django.contrib import admin  # noqa: F401
+
+# Register your models here.
diff --git a/backend/census_historical_migration/apps.py b/backend/census_historical_migration/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class CensusHistoricalMigrationConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "census_historical_migration"
diff --git a/backend/census_historical_migration/management/commands/historic_data_migrator.py b/backend/census_historical_migration/management/commands/historic_data_migrator.py
@@ -0,0 +1,61 @@
+import os
+import logging
+import sys
+
+from config.settings import ENVIRONMENT
+from django.core.management.base import BaseCommand
+from census_historical_migration.workbooklib.end_to_end_core import run_end_to_end
+
+CYPRESS_TEST_EMAIL_ADDR = os.getenv("CYPRESS_LOGIN_TEST_EMAIL_AUDITEE")
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument("--email", type=str, required=False)
+        parser.add_argument("--dbkeys", type=str, required=False, default="")
+        parser.add_argument("--years", type=str, required=False, default="")
+
+    def handle(self, *args, **options):
+        dbkeys_str = options["dbkeys"]
+        years_str = options["years"]
+        dbkeys = dbkeys_str.split(",")
+        years = years_str.split(",")
+
+        if len(dbkeys) != len(years):
+            logger.error(
+                "Received {} dbkeys and {} years. Must be equal. Exiting.".format(
+                    len(dbkeys), len(years)
+                )
+            )
+            sys.exit(-1)
+
+        lengths = [len(s) == 2 for s in years]
+        if dbkeys_str and years_str and (not all(lengths)):
+            logger.error("Years must be two digits. Exiting.")
+            sys.exit(-2)
+
+        email = options.get("email", CYPRESS_TEST_EMAIL_ADDR)
+
+        defaults = [
+            (182926, 22),
+            (181744, 22),
+            (191734, 22),
+        ]
+
+        if ENVIRONMENT in ["LOCAL", "DEVELOPMENT", "PREVIEW", "STAGING"]:
+            if dbkeys_str and years_str:
+                logger.info(
+                    f"Generating test reports for DBKEYS: {dbkeys_str} and YEARS: {years_str}"
+                )
+                for dbkey, year in zip(dbkeys, years):
+                    run_end_to_end(email, dbkey, year)
+            else:
+                for pair in defaults:
+                    logger.info("Running {}-{} end-to-end".format(pair[0], pair[1]))
+                    run_end_to_end(email, str(pair[0]), str(pair[1]))
+        else:
+            logger.error(
+                "Cannot run end-to-end workbook generation in production. Exiting."
+            )
+            sys.exit(-3)
diff --git a/backend/census_historical_migration/management/commands/historic_workbook_generator.py b/backend/census_historical_migration/management/commands/historic_workbook_generator.py
@@ -0,0 +1,217 @@
+from collections import namedtuple as NT
+from playhouse.shortcuts import model_to_dict
+import os
+import sys
+import json
+
+from django.core.management.base import BaseCommand
+
+import argparse
+import pprint
+
+from census_historical_migration.workbooklib.workbook_creation import (
+    sections,
+    workbook_loader,
+    setup_sac,
+)
+
+import datetime
+
+from census_historical_migration.workbooklib.census_models.census import (
+    CensusGen22 as Gen,
+)
+
+import logging
+
+pp = pprint.PrettyPrinter(indent=2)
+
+parser = argparse.ArgumentParser()
+
+logger = logging.getLogger(__name__)
+logging.basicConfig()
+logging.getLogger().setLevel(logging.INFO)
+
+# This provides a way to map the sheet in the workbook to the
+# column in the DB. It also has a default value and
+# the type of value, so that things can be set correctly
+# before filling in the XLSX workbooks.
+FieldMap = NT("FieldMap", "in_sheet in_db default type")
+
+templates = {
+    "AdditionalUEIs": "additional-ueis-workbook.xlsx",
+    "AdditionalEINs": "additional-eins-workbook.xlsx",
+    "AuditFindingsText": "audit-findings-text-workbook.xlsx",
+    "CAP": "corrective-action-plan-workbook.xlsx",
+    "AuditFindings": "federal-awards-audit-findings-workbook.xlsx",
+    "FederalAwards": "federal-awards-workbook.xlsx",
+    "SEFA": "notes-to-sefa-workbook.xlsx",
+    "SecondaryAuditors": "secondary-auditors-workbook.xlsx",
+}
+
+
+def set_single_cell_range(wb, range_name, value):
+    the_range = wb.defined_names[range_name]
+    # The above returns a generator. Turn it to a list, and grab
+    # the first element of the list. Now, this *tuple* contains a
+    # sheet name and a cell reference... which you need to get rid
+    # of the '$' to use.
+    # https://itecnote.com/tecnote/python-using-excel-named-ranges-in-python-with-openpyxl/
+    tup = list(the_range.destinations)[0]
+    sheet_title = tup[0]
+    cell_ref = tup[1].replace("$", "")
+    ws = wb[sheet_title]
+    ws[cell_ref] = value
+
+
+# A tiny helper to index into workbooks.
+# Assumes a capital letter.
+def col_to_ndx(col):
+    return ord(col) - 65 + 1
+
+
+# Helper to set a range of values.
+# Takes a named range, and then walks down the range,
+# filling in values from the list past in (values).
+def set_range(wb, range_name, values, default=None, type=str):
+    the_range = wb.defined_names[range_name]
+    dest = list(the_range.destinations)[0]
+    sheet_title = dest[0]
+    ws = wb[sheet_title]
+
+    start_cell = dest[1].replace("$", "").split(":")[0]
+    col = col_to_ndx(start_cell[0])
+    start_row = int(start_cell[1])
+
+    for ndx, v in enumerate(values):
+        row = ndx + start_row
+        if v:
+            # This is a very noisy statement, showing everything
+            # written into the workbook.
+            # print(f'{range_name} c[{row}][{col}] <- {v} len({len(v)}) {default}')
+            if v is not None:
+                ws.cell(row=row, column=col, value=type(v))
+            if len(v) == 0 and default is not None:
+                # This is less noisy. Shows up for things like
+                # empty findings counts. 2023 submissions
+                # require that field to be 0, not empty,
+                # if there are no findings.
+                # print('Applying default')
+                ws.cell(row=row, column=col, value=type(default))
+        if not v:
+            if default is not None:
+                ws.cell(row=row, column=col, value=type(default))
+            else:
+                ws.cell(row=row, column=col, value="")
+        else:
+            # Leave it blank if we have no default passed in
+            pass
+
+
+def set_uei(wb, dbkey):
+    g = Gen.select().where(Gen.dbkey == dbkey).get()
+    set_single_cell_range(wb, "auditee_uei", g.uei)
+    return g
+
+
+def map_simple_columns(wb, mappings, values):
+    # Map all the simple ones
+    for m in mappings:
+        set_range(
+            wb,
+            m.in_sheet,
+            map(lambda v: model_to_dict(v)[m.in_db], values),
+            m.default,
+            m.type,
+        )
+
+
+# FIXME: Get the padding/shape right on the report_id
+def dbkey_to_test_report_id(dbkey):
+    g = Gen.select(Gen.audityear, Gen.fyenddate).where(Gen.dbkey == dbkey).get()
+    # month = g.fyenddate.split('-')[1]
+    # 2022JUN0001000003
+    # We start new audits at 1 million.
+    # So, we want 10 digits, and zero-pad for
+    # historic DBKEY report_ids
+    return f"{g.audityear}-TEST-{dbkey.zfill(7)}"
+
+
+def generate_dissemination_test_table(api_endpoint, dbkey, mappings, objects):
+    table = {"rows": list(), "singletons": dict()}
+    table["endpoint"] = api_endpoint
+    table["report_id"] = dbkey_to_test_report_id(dbkey)
+    for o in objects:
+        as_dict = model_to_dict(o)
+        test_obj = {}
+        test_obj["fields"] = []
+        test_obj["values"] = []
+        for m in mappings:
+            # What if we only test non-null values?
+            if ((m.in_db in as_dict) and as_dict[m.in_db] is not None) and (
+                as_dict[m.in_db] != ""
+            ):
+                test_obj["fields"].append(m.in_sheet)
+                test_obj["values"].append(as_dict[m.in_db])
+        table["rows"].append(test_obj)
+    return table
+
+
+def make_file(dir, dbkey, slug):
+    return open(os.path.join(dir, f"{slug}-{dbkey}.xlsx"))
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument("--output", type=str, required=True)
+        parser.add_argument("--dbkey", type=str, required=True)
+        parser.add_argument("--year", type=str, default="22")
+
+    def handle(self, *args, **options):  # noqa: C901
+        out_basedir = None
+        if options["output"]:
+            out_basedir = options["output"]
+        else:
+            out_basedir = "output"
+
+        if not os.path.exists(out_basedir):
+            try:
+                os.mkdir(out_basedir)
+                logger.info(f"Made directory {out_basedir}")
+            except Exception as e:
+                logger.info(e)
+                logger.info(f"Could not create directory {out_basedir}")
+                sys.exit()
+
+        outdir = os.path.join(out_basedir, f'{options["dbkey"]}-{options["year"]}')
+
+        if not os.path.exists(outdir):
+            try:
+                os.mkdir(outdir)
+                logger.info(f"Made directory {outdir}")
+            except Exception as e:
+                logger.info(e)
+                logger.info("could not create output directory. exiting.")
+                sys.exit()
+
+        entity_id = "DBKEY {dbkey} {date:%Y_%m_%d_%H_%M_%S}".format(
+            dbkey=options["dbkey"], date=datetime.datetime.now()
+        )
+
+        sac = setup_sac(None, entity_id, options["dbkey"])
+        loader = workbook_loader(
+            None, sac, options["dbkey"], options["year"], entity_id
+        )
+        json_test_tables = []
+        for section, fun in sections.items():
+            (wb, api_json, filename) = loader(fun, section)
+            if wb:
+                wb_path = os.path.join(outdir, filename)
+                wb.save(wb_path)
+            if api_json:
+                json_test_tables.append(api_json)
+
+        json_path = os.path.join(outdir, f'test-array-{options["dbkey"]}.json')
+        logger.info(f"Writing JSON to {json_path}")
+        with open(json_path, "w") as test_file:
+            jstr = json.dumps(json_test_tables, indent=2, sort_keys=True)
+            test_file.write(jstr)
diff --git a/backend/census_historical_migration/migrations/__init__.py b/backend/census_historical_migration/migrations/__init__.py
diff --git a/backend/census_historical_migration/models.py b/backend/census_historical_migration/models.py
@@ -0,0 +1,3 @@
+from django.db import models  # noqa: F401
+
+# Create your models here.
diff --git a/backend/census_historical_migration/tests.py b/backend/census_historical_migration/tests.py
@@ -0,0 +1,3 @@
+from django.test import TestCase  # noqa: F401
+
+# Create your tests here.
diff --git a/backend/census_historical_migration/views.py b/backend/census_historical_migration/views.py
@@ -0,0 +1,3 @@
+from django.shortcuts import render  # noqa: F401
+
+# Create your views here.
diff --git a/backend/census_historical_migration/workbooklib/additional_eins.py b/backend/census_historical_migration/workbooklib/additional_eins.py
@@ -0,0 +1,44 @@
+from census_historical_migration.workbooklib.excel_creation import (
+    FieldMap,
+    WorkbookFieldInDissem,
+    templates,
+    set_uei,
+    map_simple_columns,
+    generate_dissemination_test_table,
+)
+
+
+from census_historical_migration.workbooklib.excel_creation import (
+    insert_version_and_sheet_name,
+)
+from census_historical_migration.workbooklib.census_models.census import dynamic_import
+
+import openpyxl as pyxl
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+mappings = [
+    FieldMap("additional_ein", "ein", WorkbookFieldInDissem, None, str),
+]
+
+
+def generate_additional_eins(dbkey, year, outfile):
+    logger.info(f"--- generate additional eins {dbkey} {year} ---")
+    Gen = dynamic_import("Gen", year)
+    Eins = dynamic_import("Eins", year)
+    wb = pyxl.load_workbook(templates["AdditionalEINs"])
+
+    g = set_uei(Gen, wb, dbkey)
+    insert_version_and_sheet_name(wb, "additional-eins-workbook")
+
+    addl_eins = Eins.select().where(Eins.dbkey == g.dbkey)
+    map_simple_columns(wb, mappings, addl_eins)
+    wb.save(outfile)
+
+    table = generate_dissemination_test_table(
+        Gen, "additional_eins", dbkey, mappings, addl_eins
+    )
+    table["singletons"]["auditee_uei"] = g.uei
+    return (wb, table)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from django.contrib import admin # noqa: F401

		# Register your models here.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from django.db import models # noqa: F401

		# Create your models here.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from django.test import TestCase # noqa: F401

		# Create your tests here.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from django.shortcuts import render # noqa: F401

		# Create your views here.