Updating CFDA Assistance Listings and script (#4269)

* Updating CFDA lookup and script * Lint * Generating templates * Adding readme * Updating readme * Bumping to 1.1.3 and using Program Title column * Updating readme * Updating readme
GSA-TTS · Sep 12, 2024 · eaad320 · eaad320
1 parent 5b73f02
commit eaad320
Show file tree

Hide file tree

Showing 31 changed files with 38,868 additions and 10,322 deletions.
diff --git a/backend/audit/intakelib/checks/check_version_number.py b/backend/audit/intakelib/checks/check_version_number.py
@@ -17,6 +17,7 @@
     "1.1.0",
     "1.1.1",
     "1.1.2",
+    "1.1.3",
 }
 
 

diff --git a/backend/schemas/Makefile b/backend/schemas/Makefile
@@ -14,8 +14,8 @@ xlsx = $(wildcard output/excel/xlsx/*-workbook*.xlsx)
 json = $(wildcard output/excel/json/*.json)
 
 source_data:
-	python scripts/generate_lookup_schemas.py source/data/cfda-lookup-20230626.csv source/base/FederalProgramNames.json
-	python scripts/generate_lookup_schemas.py source/data/cluster-names-20230626.csv source/base/ClusterNames.json
+	python scripts/generate_lookup_schemas.py cfda-lookup source/base/FederalProgramNames.json
+	python scripts/generate_lookup_schemas.py cluster-names source/base/ClusterNames.json
 
 clean:
 	for f in $(xlsx); do \
@@ -25,7 +25,7 @@ clean:
 		rm $$f; \
 	done
 
-build_audit_json: 
+build_audit_json:
 	for jsonnet_file in $(audit_specs); do \
 		base_name=$$(basename "$$jsonnet_file" .jsonnet); \
 		jsonnet -o output/audit/"$$base_name.json" "$$jsonnet_file"; \

diff --git a/backend/schemas/README.md b/backend/schemas/README.md
@@ -0,0 +1,15 @@
+# Bumping workbook template version
+
+Follow these steps to version bump the workbook templates:
+- `backend/schemas/source/excel/libs/Sheets.libsonnet`: Update the `WORKBOOKS_VERSION` variable
+- `backend/audit/intakelib/checks/check_version_number.py`: Update the `AUTHORIZED_VERSIONS` variable
+- Run `make all` to generate new schemas and tempaltes
+- Once your PR is merged, don't forget to copy the new templates, found in `backend/schemas/output/excel/xlsx/`, into `assets/workbooks/` of the [static site repo](https://github.com/GSA-TTS/FAC-transition-site).
+
+# Updating the CFDA listings
+
+The current CFDA assistance listings are in the CSV found [here](https://sam.gov/data-services/Assistance%20Listings/datagov?privacy=Public). When downloading the file, save it in the format `cfda-lookup-YYYYMMDD.csv` in the `/schemas/source/data` directory. Running `make all` should be sufficent to regenerate the lookup schemas and the Excel templates.
+
+More specifically, `make all` executes `make source_data`, which, calls `generate_lookup_schemas.py`. This script can generate either cluster names or CFDA listings or agencies, depending on the args given (see docstring in the script.) The script will automatically used the latest-dated CSV file for processing. This way, the Makefile doesn't have to be repeatedly changed and we can retain the historic files. The format of the CSVs can change (and have), so changes to `generate_lookup_schemas.py` may be necessary may be necessary in the future and non-current files may no longer be processable.
+
+If you get a `UnicodeDecodeError`, you may have to manually save it with UTF-8 encoding (in VSCode, click UTF-8 in the bottom right and select "Save with encoding".)
diff --git a/backend/schemas/output/excel/json/additional-eins-workbook.json b/backend/schemas/output/excel/json/additional-eins-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/additional-ueis-workbook.json b/backend/schemas/output/excel/json/additional-ueis-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/audit-findings-text-workbook.json b/backend/schemas/output/excel/json/audit-findings-text-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/corrective-action-plan-workbook.json b/backend/schemas/output/excel/json/corrective-action-plan-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/federal-awards-audit-findings-workbook.json b/backend/schemas/output/excel/json/federal-awards-audit-findings-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/federal-awards-workbook.json b/backend/schemas/output/excel/json/federal-awards-workbook.json
diff --git a/backend/schemas/output/excel/json/notes-to-sefa-workbook.json b/backend/schemas/output/excel/json/notes-to-sefa-workbook.json
@@ -44,7 +44,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/json/secondary-auditors-workbook.json b/backend/schemas/output/excel/json/secondary-auditors-workbook.json
@@ -42,7 +42,7 @@
                "validation": {
                   "type": "NOVALIDATION"
                },
-               "value": "1.1.2",
+               "value": "1.1.3",
                "width": 48
             },
             {

diff --git a/backend/schemas/output/excel/xlsx/additional-eins-workbook.xlsx b/backend/schemas/output/excel/xlsx/additional-eins-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/additional-ueis-workbook.xlsx b/backend/schemas/output/excel/xlsx/additional-ueis-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/audit-findings-text-workbook.xlsx b/backend/schemas/output/excel/xlsx/audit-findings-text-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/corrective-action-plan-workbook.xlsx b/backend/schemas/output/excel/xlsx/corrective-action-plan-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/federal-awards-audit-findings-workbook.xlsx b/backend/schemas/output/excel/xlsx/federal-awards-audit-findings-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/federal-awards-workbook.xlsx b/backend/schemas/output/excel/xlsx/federal-awards-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/notes-to-sefa-workbook.xlsx b/backend/schemas/output/excel/xlsx/notes-to-sefa-workbook.xlsx
diff --git a/backend/schemas/output/excel/xlsx/secondary-auditors-workbook.xlsx b/backend/schemas/output/excel/xlsx/secondary-auditors-workbook.xlsx
diff --git a/backend/schemas/output/sections/AdditionalEINs.schema.json b/backend/schemas/output/sections/AdditionalEINs.schema.json
@@ -65,7 +65,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/AdditionalUEIs.schema.json b/backend/schemas/output/sections/AdditionalUEIs.schema.json
@@ -88,7 +88,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/AuditFindingsText.schema.json b/backend/schemas/output/sections/AuditFindingsText.schema.json
@@ -89,7 +89,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/CorrectiveActionPlan.schema.json b/backend/schemas/output/sections/CorrectiveActionPlan.schema.json
@@ -89,7 +89,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/FederalAwards.schema.json b/backend/schemas/output/sections/FederalAwards.schema.json
@@ -577,7 +577,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/FederalAwardsAuditFindings.schema.json b/backend/schemas/output/sections/FederalAwardsAuditFindings.schema.json
@@ -8670,7 +8670,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/NotesToSefa.schema.json b/backend/schemas/output/sections/NotesToSefa.schema.json
@@ -13,7 +13,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/output/sections/SecondaryAuditors.schema.json b/backend/schemas/output/sections/SecondaryAuditors.schema.json
@@ -13,7 +13,7 @@
                "type": "string"
             },
             "version": {
-               "const": "1.1.2",
+               "const": "1.1.3",
                "type": "string"
             }
          },

diff --git a/backend/schemas/scripts/generate_lookup_schemas.py b/backend/schemas/scripts/generate_lookup_schemas.py
@@ -1,7 +1,18 @@
-import pandas as pd
+import glob
 import json
+import pandas as pd
 import sys
 
+"""
+This script processes CFDA/ALN and cluster name CSV files to generate schema
+JSON, and it can be run using `make source_data`. Input files are found in
+`schemas/source/data`, and the latest-dated file will be used. To run manually:
+
+`python scripts/generate_lookup_schemas.py <item to process> <output JSON filepath>`
+
+where "item to process" is either "cfda-lookup" or "cluster-names".
+"""
+
 
 def cleanup_string(s):
     s = str(s).strip()
@@ -14,20 +25,23 @@ def lmap(fun, ls):
     return list(map(fun, ls))
 
 
-def process_cfda_lookup(arg):
-    df = pd.read_csv(arg[1], converters={"CFDAEXT": str})
+def process_cfda_lookup(file_path):
+    df = pd.read_csv(file_path, encoding="utf-8", converters={"Program Number": str})
+
     # Build a couple of Python objects to render as
     # JSON, and then as Jsonnet
-    program_names = list(df["FEDERALPROGRAMNAME"])
+    program_names = list(df["Program Title"])
+    program_numbers = list(df["Program Number"])
 
     unique_prefixes_dict = {}
-    for prefix in df["CFDAPREFIX"]:
-        unique_prefixes_dict[prefix] = prefix
-    unique_prefix_list = list(unique_prefixes_dict.keys())
-
     unique_cfda_dict = {}
-    for index, row in df.iterrows():
-        unique_cfda_dict[f"{row['CFDAPREFIX']}.{row['CFDAEXT']}"] = 1
+
+    for program_number in program_numbers:
+        prefix, _ = program_number.split(".")
+        unique_prefixes_dict[prefix] = None
+        unique_cfda_dict[program_number] = None
+
+    unique_prefix_list = list(unique_prefixes_dict.keys())
     unique_cfda_list = list(unique_cfda_dict.keys())
 
     # Clean everything up
@@ -45,8 +59,8 @@ def process_cfda_lookup(arg):
     }
 
 
-def process_cluster_names(arg):
-    df = pd.read_csv(arg[1])
+def process_cluster_names(filename):
+    df = pd.read_csv(filename)
     cluster_names = list(df["NAME"])
     # Clean everything up
     cluster_names = lmap(cleanup_string, cluster_names)
@@ -60,15 +74,30 @@ def process_cluster_names(arg):
 
 if __name__ == "__main__":
     if len(sys.argv) >= 2:
-        filename = sys.argv[1]
-        obj = None
-        if "cfda-lookup" in filename.lower():
-            obj = process_cfda_lookup(sys.argv)
-        elif "cluster-names" in filename.lower():
-            obj = process_cluster_names(sys.argv)
-        else:
-            print("Unknown filename, exiting")
+        item_to_process = sys.argv[1]
+        glob_str = f"./source/data/{item_to_process}-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].csv"
+
+        print(f"Globbing for {glob_str}")
+
+        list_of_files = glob.glob(glob_str)
+        print(f"Found {len(list_of_files)} files")
+
+        if not len(list_of_files):
+            print(f"No {item_to_process} CSV files found in schemas/source/data/")
             sys.exit(1)
 
+        latest_file = sorted(list_of_files)[-1]
+        print(f"Processing latest file {latest_file}")
+
+        obj = None
+        match item_to_process:
+            case "cfda-lookup":
+                obj = process_cfda_lookup(latest_file)
+            case "cluster-names":
+                obj = process_cluster_names(latest_file)
+            case _:
+                print("Unknown filename, exiting")
+                sys.exit(1)
+
         with open(sys.argv[2], "w", newline="\n") as write_file:
             json.dump(obj, write_file, indent=2, sort_keys=True)