Skip to content

Commit

Permalink
Updating CFDA Assistance Listings and script (#4269)
Browse files Browse the repository at this point in the history
* Updating CFDA lookup and script

* Lint

* Generating templates

* Adding readme

* Updating readme

* Bumping to 1.1.3 and using Program Title column

* Updating readme

* Updating readme
  • Loading branch information
phildominguez-gsa authored Sep 12, 2024
1 parent 5b73f02 commit eaad320
Show file tree
Hide file tree
Showing 31 changed files with 38,868 additions and 10,322 deletions.
1 change: 1 addition & 0 deletions backend/audit/intakelib/checks/check_version_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"1.1.0",
"1.1.1",
"1.1.2",
"1.1.3",
}


Expand Down
6 changes: 3 additions & 3 deletions backend/schemas/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ xlsx = $(wildcard output/excel/xlsx/*-workbook*.xlsx)
json = $(wildcard output/excel/json/*.json)

source_data:
python scripts/generate_lookup_schemas.py source/data/cfda-lookup-20230626.csv source/base/FederalProgramNames.json
python scripts/generate_lookup_schemas.py source/data/cluster-names-20230626.csv source/base/ClusterNames.json
python scripts/generate_lookup_schemas.py cfda-lookup source/base/FederalProgramNames.json
python scripts/generate_lookup_schemas.py cluster-names source/base/ClusterNames.json

clean:
for f in $(xlsx); do \
Expand All @@ -25,7 +25,7 @@ clean:
rm $$f; \
done

build_audit_json:
build_audit_json:
for jsonnet_file in $(audit_specs); do \
base_name=$$(basename "$$jsonnet_file" .jsonnet); \
jsonnet -o output/audit/"$$base_name.json" "$$jsonnet_file"; \
Expand Down
15 changes: 15 additions & 0 deletions backend/schemas/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Bumping workbook template version

Follow these steps to version bump the workbook templates:
- `backend/schemas/source/excel/libs/Sheets.libsonnet`: Update the `WORKBOOKS_VERSION` variable
- `backend/audit/intakelib/checks/check_version_number.py`: Update the `AUTHORIZED_VERSIONS` variable
- Run `make all` to generate new schemas and tempaltes
- Once your PR is merged, don't forget to copy the new templates, found in `backend/schemas/output/excel/xlsx/`, into `assets/workbooks/` of the [static site repo](https://github.com/GSA-TTS/FAC-transition-site).

# Updating the CFDA listings

The current CFDA assistance listings are in the CSV found [here](https://sam.gov/data-services/Assistance%20Listings/datagov?privacy=Public). When downloading the file, save it in the format `cfda-lookup-YYYYMMDD.csv` in the `/schemas/source/data` directory. Running `make all` should be sufficent to regenerate the lookup schemas and the Excel templates.

More specifically, `make all` executes `make source_data`, which, calls `generate_lookup_schemas.py`. This script can generate either cluster names or CFDA listings or agencies, depending on the args given (see docstring in the script.) The script will automatically used the latest-dated CSV file for processing. This way, the Makefile doesn't have to be repeatedly changed and we can retain the historic files. The format of the CSVs can change (and have), so changes to `generate_lookup_schemas.py` may be necessary may be necessary in the future and non-current files may no longer be processable.

If you get a `UnicodeDecodeError`, you may have to manually save it with UTF-8 encoding (in VSCode, click UTF-8 in the bottom right and select "Save with encoding".)
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
9,094 changes: 4,113 additions & 4,981 deletions backend/schemas/output/excel/json/federal-awards-workbook.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"validation": {
"type": "NOVALIDATION"
},
"value": "1.1.2",
"value": "1.1.3",
"width": 48
},
{
Expand Down
Binary file modified backend/schemas/output/excel/xlsx/additional-eins-workbook.xlsx
Binary file not shown.
Binary file modified backend/schemas/output/excel/xlsx/additional-ueis-workbook.xlsx
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified backend/schemas/output/excel/xlsx/federal-awards-workbook.xlsx
Binary file not shown.
Binary file modified backend/schemas/output/excel/xlsx/notes-to-sefa-workbook.xlsx
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion backend/schemas/output/sections/AdditionalEINs.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
2 changes: 1 addition & 1 deletion backend/schemas/output/sections/AdditionalUEIs.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
2 changes: 1 addition & 1 deletion backend/schemas/output/sections/FederalAwards.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8670,7 +8670,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
2 changes: 1 addition & 1 deletion backend/schemas/output/sections/NotesToSefa.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"type": "string"
},
"version": {
"const": "1.1.2",
"const": "1.1.3",
"type": "string"
}
},
Expand Down
69 changes: 49 additions & 20 deletions backend/schemas/scripts/generate_lookup_schemas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
import pandas as pd
import glob
import json
import pandas as pd
import sys

"""
This script processes CFDA/ALN and cluster name CSV files to generate schema
JSON, and it can be run using `make source_data`. Input files are found in
`schemas/source/data`, and the latest-dated file will be used. To run manually:
`python scripts/generate_lookup_schemas.py <item to process> <output JSON filepath>`
where "item to process" is either "cfda-lookup" or "cluster-names".
"""


def cleanup_string(s):
s = str(s).strip()
Expand All @@ -14,20 +25,23 @@ def lmap(fun, ls):
return list(map(fun, ls))


def process_cfda_lookup(arg):
df = pd.read_csv(arg[1], converters={"CFDAEXT": str})
def process_cfda_lookup(file_path):
df = pd.read_csv(file_path, encoding="utf-8", converters={"Program Number": str})

# Build a couple of Python objects to render as
# JSON, and then as Jsonnet
program_names = list(df["FEDERALPROGRAMNAME"])
program_names = list(df["Program Title"])
program_numbers = list(df["Program Number"])

unique_prefixes_dict = {}
for prefix in df["CFDAPREFIX"]:
unique_prefixes_dict[prefix] = prefix
unique_prefix_list = list(unique_prefixes_dict.keys())

unique_cfda_dict = {}
for index, row in df.iterrows():
unique_cfda_dict[f"{row['CFDAPREFIX']}.{row['CFDAEXT']}"] = 1

for program_number in program_numbers:
prefix, _ = program_number.split(".")
unique_prefixes_dict[prefix] = None
unique_cfda_dict[program_number] = None

unique_prefix_list = list(unique_prefixes_dict.keys())
unique_cfda_list = list(unique_cfda_dict.keys())

# Clean everything up
Expand All @@ -45,8 +59,8 @@ def process_cfda_lookup(arg):
}


def process_cluster_names(arg):
df = pd.read_csv(arg[1])
def process_cluster_names(filename):
df = pd.read_csv(filename)
cluster_names = list(df["NAME"])
# Clean everything up
cluster_names = lmap(cleanup_string, cluster_names)
Expand All @@ -60,15 +74,30 @@ def process_cluster_names(arg):

if __name__ == "__main__":
if len(sys.argv) >= 2:
filename = sys.argv[1]
obj = None
if "cfda-lookup" in filename.lower():
obj = process_cfda_lookup(sys.argv)
elif "cluster-names" in filename.lower():
obj = process_cluster_names(sys.argv)
else:
print("Unknown filename, exiting")
item_to_process = sys.argv[1]
glob_str = f"./source/data/{item_to_process}-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].csv"

print(f"Globbing for {glob_str}")

list_of_files = glob.glob(glob_str)
print(f"Found {len(list_of_files)} files")

if not len(list_of_files):
print(f"No {item_to_process} CSV files found in schemas/source/data/")
sys.exit(1)

latest_file = sorted(list_of_files)[-1]
print(f"Processing latest file {latest_file}")

obj = None
match item_to_process:
case "cfda-lookup":
obj = process_cfda_lookup(latest_file)
case "cluster-names":
obj = process_cluster_names(latest_file)
case _:
print("Unknown filename, exiting")
sys.exit(1)

with open(sys.argv[2], "w", newline="\n") as write_file:
json.dump(obj, write_file, indent=2, sort_keys=True)
Loading

0 comments on commit eaad320

Please sign in to comment.