From 1daa2edf97869dea06144e7cc74aabe243641a40 Mon Sep 17 00:00:00 2001 From: Phil Dominguez <142051477+phildominguez-gsa@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:55:17 -0400 Subject: [PATCH] Removing old workbook_generator tool (#2635) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Phil Dominguez <“philip.dominguez@gsa.gov”> --- tools/workbook-generator/.gitignore | 5 - tools/workbook-generator/README.md | 73 - tools/workbook-generator/cog_oversight_dbkeys | 1703 ----------------- tools/workbook-generator/data/.gitignore | 1 - tools/workbook-generator/data/ay22/models.py | 299 --- tools/workbook-generator/historic_workbooks | 15 - tools/workbook-generator/main.py | 496 ----- tools/workbook-generator/requirements.txt | 3 - .../templates/PLACE-TEMPLATE-FILES-HERE | 0 9 files changed, 2595 deletions(-) delete mode 100644 tools/workbook-generator/.gitignore delete mode 100644 tools/workbook-generator/README.md delete mode 100644 tools/workbook-generator/cog_oversight_dbkeys delete mode 100644 tools/workbook-generator/data/.gitignore delete mode 100644 tools/workbook-generator/data/ay22/models.py delete mode 100755 tools/workbook-generator/historic_workbooks delete mode 100644 tools/workbook-generator/main.py delete mode 100644 tools/workbook-generator/requirements.txt delete mode 100644 tools/workbook-generator/templates/PLACE-TEMPLATE-FILES-HERE diff --git a/tools/workbook-generator/.gitignore b/tools/workbook-generator/.gitignore deleted file mode 100644 index 181d88bc03..0000000000 --- a/tools/workbook-generator/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -__pycache__/** -data/ay22/*.sqlite3 -output/** -templates/*.xlsx -.~lock* diff --git a/tools/workbook-generator/README.md b/tools/workbook-generator/README.md deleted file mode 100644 index 91bbe04b95..0000000000 --- a/tools/workbook-generator/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# workbook-generator - -## The spirit/concept of the thing - -The spirit of this spike was the following: - -1. We have an ETL process that (arguably) needs (valid, populated) workbooks as input. -2. As output, the ETL processes puts data in dissemination tables. -3. Instead of checking if the data is in the dissemination tables, a regression/E2E test for the data would be to make API calls that extract the data. -4. If you have the data in your hands when you generate the workbook, you could *also* generate test tables to drive regression tests. - -Meaning: - -1. If we spit out populated workbooks, and -2. A set of testing tables that say "*this* endpoint should produce *this* data" - -All driven off of data we believe to be good, then we have a way of generating multiple, populated test cases. - -### Possible gravy - -1. To test cross-validation, we can use sets of workbooks output by the tool. In theory, they should all validate. -2. We can take sets of workbooks, and manually introduce errors that we know will produce cross-validation errors. These can then become part of a regression suite for cross-validation. - -In other words, the goal was to quickly explore a test-data-creator. - - -## Running the tool - -``` -python main.py --dbkey -``` - -E.g. - -``` -python main.py --dbkey 100010 -``` - -will generate two files (at the moment) in `output`. - -For now, only one-and-a-bit workbooks are generated. I started with federal awards, being the largest workbook. Note that data sometimes has to be pulled from multiple database tables to assemble a single workbook... and, some of our identifiers are new/different, so there's some reverse mapping/generation that has to be done in order to create new-style workbooks from old-style data. - -But, I think it's still better than trying to create test workbooks by hand. - -## Requirements - -1. In `data/ay22`, place the file `allfac22.sqlite3`. This is currently hard-coded into `models.py`. For now/for the spike, 2022 data should be plenty. -2. In `templates`, place recent workbook template files. Their names are also hard-coded into the script. If we change the workbook names (which, in theory, we're going to, to add version numbers...) this will break. If we keep this tool, we can be smarter about this. -3. The directory `output` will be created by the script. - -The sqlite file can be found in our GDrive. Or, you can use `generate-sqlite-files` to create one from a Census `.zip` file of the AY22 data. - -## Notes - -All of the data in the SQLite file is guaranteed to be public. The SQLite file was generated from the pipe-delimited CSVs that Census distributes on their website. So, it is data that Census published, and we converted from CSV to SQLite. - -The `models.py` file was generated with the `pwiz` tool. It is part of the `peewee` suite of tools that are associated with that (small?) Python ORM. - -Something like - -``` -pwiz allfac22.sqlite3 > models.py -``` - -generated the file. - -The `models.py` can be made to take a dynamic path to a database, which would allow us to put the `sqlite3` file somewhere else. I have not attempted this change, because this is a spike, and it may not be necessary. - -If we come up with a use-case to improve this tool, we'll do so. - -## And... - -This is a spike. It can be thrown away. Or, it might have value. YMMV. diff --git a/tools/workbook-generator/cog_oversight_dbkeys b/tools/workbook-generator/cog_oversight_dbkeys deleted file mode 100644 index 632f5cd740..0000000000 --- a/tools/workbook-generator/cog_oversight_dbkeys +++ /dev/null @@ -1,1703 +0,0 @@ -101078 -101101 -101106 -101129 -101136 -101139 -101209 -101232 -101236 -101267 -101351 -101742 -102253 -102318 -102323 -102442 -102701 -10394 -104198 -104248 -104277 -104609 -105010 -105204 -1062 -106315 -106316 -10633 -10677 -10689 -10698 -10780 -10860 -109213 -109221 -109269 -109321 -109754 -109945 -111084 -111571 -111596 -111758 -11261 -113416 -11343 -113665 -113680 -114207 -114425 -114429 -114648 -11478 -115770 -115787 -115791 -115793 -115796 -115810 -115819 -115836 -115971 -115999 -11634 -116380 -116391 -117072 -11754 -118294 -118321 -118364 -118369 -118431 -11852 -11916 -11980 -119812 -122362 -123150 -123265 -123466 -123473 -123517 -123525 -123574 -123693 -123694 -123802 -123893 -123976 -124020 -124066 -124143 -124278 -124464 -124481 -124616 -124640 -124691 -124798 -124865 -125231 -125252 -125751 -125770 -125871 -126425 -127155 -127 -127348 -127595 -127959 -128842 -128910 -128921 -128977 -128979 -128980 -128986 -129006 -129020 -129234 -129643 -129688 -129709 -129727 -129738 -129740 -129769 -129783 -129795 -129822 -129864 -130053 -130209 -130213 -130250 -130252 -130278 -130410 -130416 -130418 -130434 -130442 -130444 -130457 -130461 -130463 -130498 -130501 -130503 -130579 -130584 -130749 -13106 -13163 -131710 -131767 -131804 -131807 -131810 -131816 -131822 -131824 -131825 -131826 -131838 -131845 -131846 -131848 -131853 -131858 -131860 -131867 -131871 -131872 -131873 -131877 -131878 -131879 -131882 -131884 -131885 -131888 -131891 -131898 -131904 -131912 -131915 -131925 -131929 -131972 -132044 -132045 -132056 -132057 -132062 -132155 -132161 -132169 -132185 -132248 -132266 -132271 -132318 -132350 -132402 -132411 -132417 -132422 -132478 -132630 -132750 -133079 -133082 -133100 -133121 -133128 -133185 -133274 -133348 -133559 -133572 -133615 -133619 -133649 -133733 -133742 -133756 -133770 -133787 -133823 -133826 -133840 -133844 -133849 -133858 -133863 -133864 -133865 -133871 -133873 -133876 -133880 -133881 -133907 -133914 -133915 -133916 -133922 -134025 -134028 -134034 -134044 -134046 -134053 -134055 -134076 -134080 -134083 -134084 -134087 -134088 -134092 -134096 -134100 -134103 -134105 -134110 -134119 -134120 -134122 -134136 -134137 -134138 -134142 -134144 -134150 -134153 -134155 -134184 -134193 -134200 -134208 -134213 -134215 -134233 -134236 -134323 -134346 -134458 -134460 -134561 -134596 -13468 -134720 -134732 -134734 -134751 -134765 -134855 -135326 -135331 -135336 -135361 -135363 -135400 -1354 -135667 -135673 -135683 -135844 -135912 -135992 -135998 -136060 -136063 -136077 -136090 -136110 -136118 -136122 -136123 -136124 -136126 -136129 -136133 -136138 -136225 -136246 -136274 -136350 -136521 -1366 -136718 -136721 -136728 -136729 -136732 -136736 -136738 -136752 -136760 -136762 -136764 -136770 -136772 -136778 -136781 -136783 -136784 -136786 -136788 -136795 -136797 -136798 -136799 -136805 -1368 -136829 -136904 -136943 -137490 -137671 -137677 -137690 -137707 -137721 -137724 -137732 -137769 -137785 -137874 -137875 -137883 -137884 -137893 -137894 -137895 -137896 -137901 -137921 -137965 -138010 -138350 -138467 -138509 -138538 -138560 -138590 -138595 -141344 -141358 -141433 -141569 -141865 -141961 -142117 -142179 -142228 -142234 -142235 -142292 -142300 -142676 -143130 -143278 -143452 -143559 -143683 -144054 -144594 -144663 -144671 -144797 -145235 -145652 -145798 -145808 -145849 -145869 -145871 -145872 -145873 -146075 -146140 -146487 -146521 -146599 -146632 -146695 -146970 -147031 -147032 -147033 -147040 -147051 -147055 -147064 -147077 -147110 -147111 -147114 -147719 -147725 -147738 -147739 -147741 -147742 -147743 -147913 -147916 -147919 -147937 -147942 -147949 -147952 -147954 -147965 -148509 -148653 -148655 -148656 -148657 -148661 -148662 -148663 -148665 -148666 -148671 -148713 -148931 -149215 -150206 -150266 -150340 -150341 -150450 -150606 -150611 -150662 -150667 -150676 -150739 -150746 -151616 -151681 -15175 -15177 -151808 -151978 -152109 -152111 -152117 -152599 -152734 -152737 -152738 -152741 -152745 -152746 -152747 -15322 -153631 -153845 -153896 -153897 -153902 -154114 -154131 -15445 -154763 -154769 -154770 -154800 -154881 -154885 -15491 -155160 -15535 -155652 -155756 -155930 -155943 -155947 -155991 -156000 -156014 -156039 -156041 -156500 -156501 -156504 -156506 -156507 -156508 -156543 -156801 -156810 -157585 -158015 -158018 -158022 -158023 -158024 -158041 -158048 -158049 -158054 -158056 -158089 -158218 -158219 -158238 -158366 -158385 -158397 -158537 -158643 -158720 -158723 -158767 -158993 -160020 -160651 -160912 -160937 -160966 -160969 -160975 -161007 -161008 -161023 -161024 -161027 -161029 -161034 -161044 -161045 -161052 -161071 -161079 -161100 -161102 -161105 -161115 -161117 -161121 -161141 -161151 -161185 -161190 -161191 -161204 -161378 -161412 -161476 -161632 -161739 -161932 -162367 -162380 -162392 -162393 -162395 -162405 -162408 -162411 -162412 -162413 -162414 -162415 -162418 -162420 -162757 -163900 -163922 -164186 -164252 -164437 -164498 -164514 -164655 -164836 -165114 -165198 -165774 -165941 -165971 -165981 -166006 -166023 -166040 -166046 -166054 -166150 -166198 -166236 -166437 -166673 -166683 -166693 -166744 -166791 -166793 -166794 -166795 -166796 -169402 -169627 -169669 -169839 -169847 -169929 -169951 -170068 -170255 -170323 -170349 -170355 -170356 -170374 -170375 -170419 -170721 -170734 -170735 -170755 -170761 -170768 -170800 -170831 -171279 -171359 -171391 -171405 -171415 -171421 -171432 -171436 -17157 -171671 -171869 -171871 -171944 -171961 -171977 -172003 -172017 -172025 -172047 -172167 -172246 -17225 -172278 -172393 -17262 -17274 -172790 -172814 -172891 -172924 -173037 -173040 -17331 -173806 -173810 -173929 -173943 -173944 -173946 -173947 -174016 -174017 -174022 -174070 -174074 -174078 -174118 -174121 -174124 -174126 -174131 -174145 -174175 -174192 -174195 -174196 -174197 -174198 -174228 -174230 -174242 -174317 -174318 -174321 -174324 -174325 -174326 -174329 -174330 -174332 -174334 -174335 -174337 -174338 -174339 -174341 -174369 -174374 -174376 -174379 -174561 -174605 -174635 -174674 -174726 -174800 -174807 -174817 -174824 -174829 -174833 -174839 -174859 -174864 -174912 -174916 -174918 -174953 -175005 -175013 -175159 -175221 -175253 -175266 -175267 -175279 -175283 -175300 -175301 -175651 -175887 -176036 -176050 -176058 -176072 -176104 -176116 -176254 -176258 -176268 -176274 -176280 -176285 -176299 -176313 -176362 -176397 -176796 -176851 -176997 -177114 -177119 -177142 -177181 -177189 -177320 -177387 -177452 -177820 -177867 -177869 -178024 -179284 -179346 -179495 -179719 -179738 -179767 -179770 -180123 -180143 -180174 -180177 -18063 -18064 -180800 -180812 -18082 -180840 -180845 -180847 -180849 -180875 -180880 -180895 -180900 -180901 -180945 -180960 -180985 -180995 -181003 -181010 -181037 -181048 -181071 -181086 -181090 -181178 -181225 -181251 -181292 -181298 -181306 -181330 -181375 -181377 -181395 -181427 -1815 -181547 -181549 -181552 -181600 -181601 -181645 -181646 -181657 -181658 -181661 -181667 -181734 -181737 -181744 -181758 -181790 -181811 -181877 -181885 -181925 -181946 -181984 -182042 -182086 -182089 -182143 -182191 -182201 -182237 -182281 -182312 -18232 -182353 -182396 -182415 -182474 -182557 -182598 -182608 -182630 -182640 -182654 -182669 -182813 -182827 -182916 -182926 -182941 -182960 -183000 -183029 -183038 -183178 -183200 -183299 -183342 -183356 -183392 -183414 -183597 -183705 -183782 -183850 -183954 -183956 -184216 -184233 -184257 -184259 -184274 -184292 -184315 -184321 -184331 -184465 -184509 -184530 -184565 -184609 -184655 -184677 -184744 -184805 -184959 -185191 -185273 -18543 -185545 -18565 -185703 -185713 -185746 -185848 -185899 -186051 -186063 -186066 -186354 -186518 -186541 -186616 -186634 -186694 -186728 -186782 -186787 -186914 -186979 -186988 -187010 -187036 -187125 -187203 -187214 -187283 -187375 -187547 -187645 -187661 -187669 -187844 -18798 -187997 -187998 -188008 -188134 -188206 -188209 -188341 -188372 -188391 -188641 -188774 -189247 -189454 -189616 -189671 -189733 -189818 -189854 -189897 -189904 -189911 -189972 -190105 -190125 -190159 -190213 -190261 -190346 -190356 -190376 -190643 -190764 -191678 -191679 -191815 -191887 -191889 -192079 -192142 -192208 -192442 -192446 -192477 -192501 -192502 -192511 -192516 -192534 -192542 -192565 -192579 -192601 -192639 -192649 -192666 -192718 -192789 -192799 -192840 -192992 -193043 -193062 -193128 -193137 -193178 -193278 -193279 -193318 -193337 -193346 -193349 -193354 -193401 -193434 -193596 -193634 -193670 -193671 -193694 -193698 -193965 -193991 -194174 -194206 -1942 -194286 -194355 -194459 -194481 -194704 -194874 -195021 -195427 -195540 -195543 -195595 -195945 -195946 -196000 -196078 -196157 -196303 -196487 -196758 -196886 -196892 -196974 -196991 -197012 -197070 -197107 -197262 -197411 -197499 -197537 -197541 -197622 -197644 -197652 -197664 -197690 -197851 -197868 -198009 -198070 -198353 -198446 -198458 -198496 -198499 -198517 -198614 -198615 -198800 -198810 -199044 -199136 -199507 -199644 -199844 -200095 -200364 -20056 -200576 -20066 -200795 -200809 -20084 -201044 -201368 -201583 -201693 -201767 -201778 -201893 -202042 -202092 -202168 -202 -202471 -202639 -203014 -203062 -203160 -203375 -203568 -203744 -204306 -204659 -204880 -205004 -205354 -20545 -205501 -205726 -205901 -20627 -206285 -206676 -206851 -207008 -207677 -207994 -208421 -208656 -208943 -209135 -209224 -209451 -209589 -209715 -210104 -210445 -210452 -210565 -210661 -211212 -211446 -211653 -211669 -211671 -211692 -211695 -211899 -212257 -212445 -212629 -212675 -212942 -2130 -2131 -213388 -2135 -214017 -214366 -215447 -2155 -215840 -215856 -215905 -216160 -2164 -216666 -217330 -217342 -217393 -217428 -217653 -217851 -218144 -218150 -218267 -2187 -218734 -218774 -218994 -219000 -2190 -219041 -219042 -219107 -219340 -219459 -219566 -219643 -219918 -220190 -220827 -220829 -2209 -220957 -220999 -221072 -221087 -221191 -221266 -221283 -221323 -221552 -222011 -222035 -222290 -222978 -223420 -223716 -224072 -224269 -224703 -225629 -225665 -226112 -226636 -226802 -227622 -227687 -227824 -227852 -228058 -228066 -228272 -229203 -229253 -229340 -229449 -229592 -229596 -229737 -229893 -230070 -230189 -230310 -230355 -230423 -230427 -231133 -231143 -231358 -231704 -231775 -231858 -231934 -232272 -232275 -232693 -232747 -233179 -233569 -233817 -233916 -234054 -23463 -23522 -23549 -235613 -235872 -236094 -236686 -237045 -237461 -237887 -238045 -238098 -238280 -238304 -238343 -238347 -2385 -238746 -238799 -238808 -238890 -239636 -239979 -24011 -240238 -240303 -240418 -240442 -240587 -240754 -240925 -241068 -241118 -241168 -242068 -242487 -242667 -242736 -242795 -242882 -242988 -243303 -243901 -244060 -2443 -244411 -244560 -244592 -244593 -244616 -244668 -244758 -244798 -244946 -245063 -245385 -245423 -245476 -245542 -245568 -245647 -245842 -246131 -246145 -246340 -246499 -246537 -246611 -246678 -246684 -246806 -246812 -246909 -247269 -247412 -247649 -247806 -247889 -247948 -248006 -248133 -248139 -248303 -248565 -248578 -248730 -249248 -249257 -249261 -249308 -249310 -249438 -249484 -249502 -249510 -250331 -250736 -250744 -250753 -251133 -251143 -251188 -251285 -251563 -251628 -251672 -251723 -251776 -251777 -252569 -252657 -252793 -253329 -253438 -253614 -253672 -25374 -25395 -253952 -253956 -25413 -25418 -254327 -254570 -254836 -255093 -255110 -25515 -255162 -255171 -25526 -25529 -25540 -25545 -25547 -25567 -255712 -255982 -256201 -25638 -256678 -25705 -257559 -25774 -258028 -258050 -258054 -258066 -258112 -258151 -258431 -258457 -258472 -258477 -258496 -26234 -2667 -27118 -2723 -27372 -27760 -28540 -28643 -2932 -2940 -29479 -29588 -2961 -29737 -3013 -3019 -31045 -3122 -3141 -31596 -31937 -32124 -3235 -3278 -3308 -33196 -33339 -3379 -3416 -34557 -3471 -35195 -35206 -35212 -35469 -35683 -35732 -35765 -35809 -35942 -36006 -37223 -37834 -37861 -37918 -38105 -3886 -41084 -41447 -41588 -42354 -4261 -42780 -44389 -45463 -45486 -45506 -45510 -45589 -45608 -47025 -47703 -47882 -48063 -48272 -48297 -48313 -48335 -48349 -48352 -48426 -48480 -48485 -48517 -49171 -49782 -50485 -50593 -50642 -50976 -511 -52434 -53254 -53625 -54157 -54829 -54901 -55659 -55715 -56674 -57098 -58055 -58639 -58643 -5865 -58834 -59643 -5982 -6016 -61396 -61412 -64966 -65539 -65687 -65809 -658 -65841 -65853 -65863 -65867 -65887 -65989 -6652 -6735 -67983 -68000 -68053 -68080 -68101 -69071 -69267 -69462 -6983 -7022 -70282 -7055 -71243 -71253 -72105 -72628 -72985 -73286 -73390 -738 -73971 -74286 -74643 -74976 -75070 -75117 -75188 -75191 -75207 -75269 -75348 -75514 -75735 -76672 -77241 -77809 -77823 -77824 -77831 -77849 -77883 -78019 -78028 -78063 -78383 -78442 -78670 -79002 -79142 -79792 -80286 -80920 -81316 -81341 -81346 -81362 -81366 -81375 -81635 -81941 -82593 -82670 -83091 -83112 -83404 -83408 -84632 -84650 -84663 -8469 -8486 -8488 -8489 -8490 -8518 -85 -85388 -85608 -85763 -8650 -86512 -8656 -87167 -87296 -87864 -87886 -87899 -87900 -88048 -88084 -8813 -88388 -8851 -88532 -88612 -8862 -88715 -88747 -88898 -89485 -895 -89865 -90240 -90356 -90878 -91183 -91200 -91651 -91678 -91751 -91926 -91992 -92121 -92927 -92977 -92979 -92997 -93013 -93016 -93019 -93230 -93339 -93650 -94539 -94582 -94765 -95627 -95684 -95734 -96882 -96890 -97664 -98107 -98449 -98491 -98545 -98571 -98680 -9899 -99942 diff --git a/tools/workbook-generator/data/.gitignore b/tools/workbook-generator/data/.gitignore deleted file mode 100644 index b1fd37d048..0000000000 --- a/tools/workbook-generator/data/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*/**journal diff --git a/tools/workbook-generator/data/ay22/models.py b/tools/workbook-generator/data/ay22/models.py deleted file mode 100644 index 5165bf0ad6..0000000000 --- a/tools/workbook-generator/data/ay22/models.py +++ /dev/null @@ -1,299 +0,0 @@ -from peewee import * - -database = SqliteDatabase('data/ay22/allfac22.sqlite3') - -class UnknownField(object): - def __init__(self, *_, **__): pass - -class BaseModel(Model): - class Meta: - database = database - -class Agency(BaseModel): - agency = TextField(column_name='AGENCY', null=True) - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - ein = TextField(column_name='EIN', null=True) - - class Meta: - table_name = 'agency' - primary_key = False - -class Captext(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - chartstables = TextField(column_name='CHARTSTABLES', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - findingrefnums = TextField(column_name='FINDINGREFNUMS', null=True) - seq_number = TextField(column_name='SEQ_NUMBER', null=True) - text = TextField(column_name='TEXT', null=True) - - class Meta: - table_name = 'captext' - primary_key = False - -class CaptextFormatted(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - chartstables = TextField(column_name='CHARTSTABLES', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - findingrefnums = TextField(column_name='FINDINGREFNUMS', null=True) - seq_number = TextField(column_name='SEQ_NUMBER', null=True) - text = TextField(column_name='TEXT', null=True) - - class Meta: - table_name = 'captext_formatted' - primary_key = False - -class Cfda(BaseModel): - amount = TextField(column_name='AMOUNT', null=True) - arra = TextField(column_name='ARRA', null=True) - audityear = TextField(column_name='AUDITYEAR', null=True) - awardidentification = TextField(column_name='AWARDIDENTIFICATION', null=True) - cfda = TextField(column_name='CFDA', null=True) - cfdaprogramname = TextField(column_name='CFDAPROGRAMNAME', null=True) - clustername = TextField(column_name='CLUSTERNAME', null=True) - clustertotal = TextField(column_name='CLUSTERTOTAL', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - direct = TextField(column_name='DIRECT', null=True) - ein = TextField(column_name='EIN', null=True) - elecauditsid = TextField(column_name='ELECAUDITSID', null=True) - federalprogramname = TextField(column_name='FEDERALPROGRAMNAME', null=True) - findingrefnums = TextField(column_name='FINDINGREFNUMS', null=True) - findings = TextField(column_name='FINDINGS', null=True) - findingscount = TextField(column_name='FINDINGSCOUNT', null=True) - loanbalance = TextField(column_name='LOANBALANCE', null=True) - loans = TextField(column_name='LOANS', null=True) - majorprogram = TextField(column_name='MAJORPROGRAM', null=True) - otherclustername = TextField(column_name='OTHERCLUSTERNAME', null=True) - passthroughamount = TextField(column_name='PASSTHROUGHAMOUNT', null=True) - passthroughaward = TextField(column_name='PASSTHROUGHAWARD', null=True) - programtotal = TextField(column_name='PROGRAMTOTAL', null=True) - qcosts2 = TextField(column_name='QCOSTS2', null=True) - rd = TextField(column_name='RD', null=True) - stateclustername = TextField(column_name='STATECLUSTERNAME', null=True) - typereport_mp = TextField(column_name='TYPEREPORT_MP', null=True) - typerequirement = TextField(column_name='TYPEREQUIREMENT', null=True) - - class Meta: - table_name = 'cfda' - primary_key = False - -class Cpas(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - cpacity = TextField(column_name='CPACITY', null=True) - cpacontact = TextField(column_name='CPACONTACT', null=True) - cpaein = TextField(column_name='CPAEIN', null=True) - cpaemail = TextField(column_name='CPAEMAIL', null=True) - cpafax = TextField(column_name='CPAFAX', null=True) - cpafirmname = TextField(column_name='CPAFIRMNAME', null=True) - cpaphone = TextField(column_name='CPAPHONE', null=True) - cpastate = TextField(column_name='CPASTATE', null=True) - cpastreet1 = TextField(column_name='CPASTREET1', null=True) - cpatitle = TextField(column_name='CPATITLE', null=True) - cpazipcode = TextField(column_name='CPAZIPCODE', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - - class Meta: - table_name = 'cpas' - primary_key = False - -class Duns(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - duns = TextField(column_name='DUNS', null=True) - dunseqnum = TextField(column_name='DUNSEQNUM', null=True) - - class Meta: - table_name = 'duns' - primary_key = False - -class Eins(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - ein = TextField(column_name='EIN', null=True) - einseqnum = TextField(column_name='EINSEQNUM', null=True) - - class Meta: - table_name = 'eins' - primary_key = False - -class Findings(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - elecauditfindingsid = TextField(column_name='ELECAUDITFINDINGSID', null=True) - elecauditsid = TextField(column_name='ELECAUDITSID', null=True) - findingsrefnums = TextField(column_name='FINDINGSREFNUMS', null=True) - materialweakness = TextField(column_name='MATERIALWEAKNESS', null=True) - modifiedopinion = TextField(column_name='MODIFIEDOPINION', null=True) - otherfindings = TextField(column_name='OTHERFINDINGS', null=True) - othernoncompliance = TextField(column_name='OTHERNONCOMPLIANCE', null=True) - priorfindingrefnums = TextField(column_name='PRIORFINDINGREFNUMS', null=True) - qcosts = TextField(column_name='QCOSTS', null=True) - repeatfinding = TextField(column_name='REPEATFINDING', null=True) - significantdeficiency = TextField(column_name='SIGNIFICANTDEFICIENCY', null=True) - typerequirement = TextField(column_name='TYPEREQUIREMENT', null=True) - - class Meta: - table_name = 'findings' - primary_key = False - -class Findingstext(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - chartstables = TextField(column_name='CHARTSTABLES', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - findingrefnums = TextField(column_name='FINDINGREFNUMS', null=True) - seq_number = TextField(column_name='SEQ_NUMBER', null=True) - text = TextField(column_name='TEXT', null=True) - - class Meta: - table_name = 'findingstext' - primary_key = False - -class FindingstextFormatted(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - chartstables = TextField(column_name='CHARTSTABLES', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - findingrefnums = TextField(column_name='FINDINGREFNUMS', null=True) - seq_number = TextField(column_name='SEQ_NUMBER', null=True) - text = TextField(column_name='TEXT', null=True) - - class Meta: - table_name = 'findingstext_formatted' - primary_key = False - -class Gen(BaseModel): - auditeecontact = TextField(column_name='AUDITEECONTACT', null=True) - auditeedatesigned = TextField(column_name='AUDITEEDATESIGNED', null=True) - auditeeemail = TextField(column_name='AUDITEEEMAIL', null=True) - auditeefax = TextField(column_name='AUDITEEFAX', null=True) - auditeename = TextField(column_name='AUDITEENAME', null=True) - auditeenametitle = TextField(column_name='AUDITEENAMETITLE', null=True) - auditeephone = TextField(column_name='AUDITEEPHONE', null=True) - auditeetitle = TextField(column_name='AUDITEETITLE', null=True) - auditor_ein = TextField(column_name='AUDITOR_EIN', null=True) - audittype = TextField(column_name='AUDITTYPE', null=True) - audityear = TextField(column_name='AUDITYEAR', null=True) - city = TextField(column_name='CITY', null=True) - cogagency = TextField(column_name='COGAGENCY', null=True) - cog_over = TextField(column_name='COG_OVER', null=True) - cpacity = TextField(column_name='CPACITY', null=True) - cpacontact = TextField(column_name='CPACONTACT', null=True) - cpacountry = TextField(column_name='CPACOUNTRY', null=True) - cpadatesigned = TextField(column_name='CPADATESIGNED', null=True) - cpaemail = TextField(column_name='CPAEMAIL', null=True) - cpafax = TextField(column_name='CPAFAX', null=True) - cpafirmname = TextField(column_name='CPAFIRMNAME', null=True) - cpaforeign = TextField(column_name='CPAFOREIGN', null=True) - cpaphone = TextField(column_name='CPAPHONE', null=True) - cpastate = TextField(column_name='CPASTATE', null=True) - cpastreet1 = TextField(column_name='CPASTREET1', null=True) - cpastreet2 = TextField(column_name='CPASTREET2', null=True) - cpatitle = TextField(column_name='CPATITLE', null=True) - cpazipcode = TextField(column_name='CPAZIPCODE', null=True) - cyfindings = TextField(column_name='CYFINDINGS', null=True) - datefirewall = TextField(column_name='DATEFIREWALL', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - dollarthreshold = TextField(column_name='DOLLARTHRESHOLD', null=True) - duns = TextField(column_name='DUNS', null=True) - dup_reports = TextField(column_name='DUP_REPORTS', null=True) - ein = TextField(column_name='EIN', null=True) - einsubcode = TextField(column_name='EINSUBCODE', null=True) - entity_type = TextField(column_name='ENTITY_TYPE', null=True) - facaccepteddate = TextField(column_name='FACACCEPTEDDATE', null=True) - fyenddate = TextField(column_name='FYENDDATE', null=True) - goingconcern = TextField(column_name='GOINGCONCERN', null=True) - lowrisk = TextField(column_name='LOWRISK', null=True) - materialnoncompliance = TextField(column_name='MATERIALNONCOMPLIANCE', null=True) - materialweakness = TextField(column_name='MATERIALWEAKNESS', null=True) - materialweakness_mp = TextField(column_name='MATERIALWEAKNESS_MP', null=True) - multipleduns = TextField(column_name='MULTIPLEDUNS', null=True) - multipleeins = TextField(column_name='MULTIPLEEINS', null=True) - multipleueis = TextField(column_name='MULTIPLEUEIS', null=True) - multiple_cpas = TextField(column_name='MULTIPLE_CPAS', null=True) - numbermonths = TextField(column_name='NUMBERMONTHS', null=True) - oversightagency = TextField(column_name='OVERSIGHTAGENCY', null=True) - periodcovered = TextField(column_name='PERIODCOVERED', null=True) - previousdatefirewall = TextField(column_name='PREVIOUSDATEFIREWALL', null=True) - pyschedule = TextField(column_name='PYSCHEDULE', null=True) - qcosts = TextField(column_name='QCOSTS', null=True) - reportablecondition = TextField(column_name='REPORTABLECONDITION', null=True) - reportablecondition_mp = TextField(column_name='REPORTABLECONDITION_MP', null=True) - reportrequired = TextField(column_name='REPORTREQUIRED', null=True) - sp_framework = TextField(column_name='SP_FRAMEWORK', null=True) - sp_framework_required = TextField(column_name='SP_FRAMEWORK_REQUIRED', null=True) - state = TextField(column_name='STATE', null=True) - street1 = TextField(column_name='STREET1', null=True) - street2 = TextField(column_name='STREET2', null=True) - totfedexpend = TextField(column_name='TOTFEDEXPEND', null=True) - typeofentity = TextField(column_name='TYPEOFENTITY', null=True) - typereport_fs = TextField(column_name='TYPEREPORT_FS', null=True) - typereport_mp = TextField(column_name='TYPEREPORT_MP', null=True) - typereport_sp_framework = TextField(column_name='TYPEREPORT_SP_FRAMEWORK', null=True) - uei = TextField(column_name='UEI', null=True) - zipcode = TextField(column_name='ZIPCODE', null=True) - - class Meta: - table_name = 'gen' - primary_key = False - -class Notes(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - content = TextField(column_name='CONTENT', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - id = TextField(column_name='ID', null=True) - note_index = TextField(column_name='NOTE_INDEX', null=True) - reportid = TextField(column_name='REPORTID', null=True) - seq_number = TextField(column_name='SEQ_NUMBER', null=True) - title = TextField(column_name='TITLE', null=True) - type_id = TextField(column_name='TYPE_ID', null=True) - version = TextField(column_name='VERSION', null=True) - - class Meta: - table_name = 'notes' - primary_key = False - -class Passthrough(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - elecauditsid = TextField(column_name='ELECAUDITSID', null=True) - passthroughid = TextField(column_name='PASSTHROUGHID', null=True) - passthroughname = TextField(column_name='PASSTHROUGHNAME', null=True) - - class Meta: - table_name = 'passthrough' - primary_key = False - -class Revisions(BaseModel): - auditinfo = TextField(column_name='AUDITINFO', null=True) - auditinfo_explain = TextField(column_name='AUDITINFO_EXPLAIN', null=True) - audityear = TextField(column_name='AUDITYEAR', null=True) - cap = TextField(column_name='CAP', null=True) - cap_explain = TextField(column_name='CAP_EXPLAIN', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - elecrptrevisionid = TextField(column_name='ELECRPTREVISIONID', null=True) - federalawards = TextField(column_name='FEDERALAWARDS', null=True) - federalawards_explain = TextField(column_name='FEDERALAWARDS_EXPLAIN', null=True) - findings = TextField(column_name='FINDINGS', null=True) - findingstext = TextField(column_name='FINDINGSTEXT', null=True) - findingstext_explain = TextField(column_name='FINDINGSTEXT_EXPLAIN', null=True) - findings_explain = TextField(column_name='FINDINGS_EXPLAIN', null=True) - geninfo = TextField(column_name='GENINFO', null=True) - geninfo_explain = TextField(column_name='GENINFO_EXPLAIN', null=True) - notestosefa = TextField(column_name='NOTESTOSEFA', null=True) - notestosefa_explain = TextField(column_name='NOTESTOSEFA_EXPLAIN', null=True) - other = TextField(column_name='OTHER', null=True) - other_explain = TextField(column_name='OTHER_EXPLAIN', null=True) - - class Meta: - table_name = 'revisions' - primary_key = False - -class Ueis(BaseModel): - audityear = TextField(column_name='AUDITYEAR', null=True) - dbkey = TextField(column_name='DBKEY', null=True) - uei = TextField(column_name='UEI', null=True) - ueiseqnum = TextField(column_name='UEISEQNUM', null=True) - - class Meta: - table_name = 'ueis' - primary_key = False - diff --git a/tools/workbook-generator/historic_workbooks b/tools/workbook-generator/historic_workbooks deleted file mode 100755 index 5df7ddf708..0000000000 --- a/tools/workbook-generator/historic_workbooks +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -declare -a keys=( - "101078" - "101101" - "101106" - "99989" - "175887" - ) - -for key in "${keys[@]}" -do - echo "$key" - python main.py --output ../../backend/data_fixtures/historic/ --dbkey $key -done diff --git a/tools/workbook-generator/main.py b/tools/workbook-generator/main.py deleted file mode 100644 index 17387255bd..0000000000 --- a/tools/workbook-generator/main.py +++ /dev/null @@ -1,496 +0,0 @@ -from collections import defaultdict -from collections import namedtuple as NT -from pathlib import Path -from playhouse.shortcuts import model_to_dict, dict_to_model -import os -import re -import sys - -import argparse -import json -import pprint -import openpyxl as pyxl - -pp = pprint.PrettyPrinter(indent=2) - -# FIXME -# We'll want to figure out how to dynamically import the -# models for a given year based on a command-line input, and -# then use that to match queries against the SQLite DBs. -# However, if we only use AY22 data, it won't matter. -from data.ay22.models import ( - Cfda, - Gen, - Passthrough, - Findings, - Findingstext, - Ueis, - Notes, - Cpas, - Captext -) - -parser = argparse.ArgumentParser() - -# This provides a way to map the sheet in the workbook to the -# column in the DB. It also has a default value and -# the type of value, so that things can be set correctly -# before filling in the XLSX workbooks. -FieldMap = NT('FieldMap', 'in_sheet in_db default type') - -# tools/workbook-generator/templates/additional-ueis-workbook.xlsx -# tools/workbook-generator/templates/audit-findings-text-workbook.xlsx -# tools/workbook-generator/templates/corrective-action-plan-workbook.xlsx -# tools/workbook-generator/templates/federal-awards-audit-findings-workbook.xlsx -# tools/workbook-generator/templates/federal-awards-workbook.xlsx -# tools/workbook-generator/templates/notes-to-sefa-workbook.xlsx -# tools/workbook-generator/templates/PLACE-TEMPLATE-FILES-HERE -# tools/workbook-generator/templates/secondary-auditors-workbook.xlsx - -templates = { - 'AdditionalUEIs': 'additional-ueis-workbook.xlsx', - 'AuditFindingsText': 'audit-findings-text-workbook.xlsx', - 'CAP': 'corrective-action-plan-workbook.xlsx', - 'AuditFindings': 'federal-awards-audit-findings-workbook.xlsx', - 'FederalAwards': 'federal-awards-workbook.xlsx', - 'SEFA': 'notes-to-sefa-workbook.xlsx', - 'SecondaryAuditors': 'secondary-auditors-workbook.xlsx' -} - -def set_single_cell_range(wb, range_name, value): - the_range = wb.defined_names[range_name] - # The above returns a generator. Turn it to a list, and grab - # the first element of the list. Now, this *tuple* contains a - # sheet name and a cell reference... which you need to get rid - # of the '$' to use. - # https://itecnote.com/tecnote/python-using-excel-named-ranges-in-python-with-openpyxl/ - tup = list(the_range.destinations)[0] - sheet_title = tup[0] - cell_ref = tup[1].replace('$', '') - ws = wb[sheet_title] - ws[cell_ref] = value - -# A tiny helper to index into workbooks. -# Assumes a capital letter. -def col_to_ndx(col): - return ord(col) - 65 + 1 - -# Helper to set a range of values. -# Takes a named range, and then walks down the range, -# filling in values from the list past in (values). -def set_range(wb, range_name, values, default=None, type=str): - the_range = wb.defined_names[range_name] - dest = list(the_range.destinations)[0] - sheet_title = dest[0] - ws = wb[sheet_title] - - start_cell = dest[1].replace('$', '').split(':')[0] - col = col_to_ndx(start_cell[0]) - start_row = int(start_cell[1]) - - for ndx, v in enumerate(values): - row = ndx+start_row - if v: - # This is a very noisy statement, showing everything - # written into the workbook. - # print(f'{range_name} c[{row}][{col}] <- {v} len({len(v)}) {default}') - if v is not None: - ws.cell(row=row, column=col, value=type(v)) - if len(v) == 0 and default is not None: - # This is less noisy. Shows up for things like - # empty findings counts. 2023 submissions - # require that field to be 0, not empty, - # if there are no findings. - # print('Applying default') - ws.cell(row=row, column=col, value=type(default)) - if not v: - if default is not None: - ws.cell(row=row, column=col, value=type(default)) - else: - ws.cell(row=row, column=col, value='') - else: - # Leave it blank if we have no default passed in - pass - -def set_uei(wb, dbkey): - g = Gen.select().where(Gen.dbkey == dbkey).get() - set_single_cell_range(wb, 'auditee_uei', g.uei) - return g - -def map_simple_columns(wb, mappings, values): - # Map all the simple ones - for m in mappings: - set_range(wb, - m.in_sheet, - map(lambda v: model_to_dict(v)[m.in_db], values), - m.default, - m.type) - - -# FIXME: Get the padding/shape right on the report_id -def dbkey_to_report_id(dbkey): - g = Gen.select(Gen.audityear,Gen.fyenddate).where(Gen.dbkey == dbkey).get() - month = g.fyenddate.split('-')[1] - # 2022JUN0001000003 - # We start new audits at 1 million. - # So, we want 10 digits, and zero-pad for - # historic DBKEY report_ids - return f'{g.audityear}{month}{dbkey.zfill(10)}' - -def generate_dissemination_test_table(api_endpoint, dbkey, mappings, objects): - table = { - 'rows': list(), - 'singletons': dict() - } - table['endpoint'] = api_endpoint - table['report_id'] = dbkey_to_report_id(dbkey) - for o in objects: - as_dict = model_to_dict(o) - test_obj = {} - test_obj['fields'] = [] - test_obj['values'] = [] - for m in mappings: - # What if we only test non-null values? - if ((m.in_db in as_dict) and as_dict[m.in_db] is not None) and (as_dict[m.in_db] != ""): - test_obj['fields'].append(m.in_sheet) - test_obj['values'].append(as_dict[m.in_db]) - table['rows'].append(test_obj) - return table - - -########################################## -# -# generate_findings -# -########################################## -def generate_findings(dbkey, outdir): - print("--- generate findings ---") - wb = pyxl.load_workbook(f'templates/{templates["AuditFindings"]}') - mappings = [ - FieldMap('compliance_requirement', 'typerequirement', None, str), - FieldMap('reference_number', 'findingsrefnums', None, str), - FieldMap('modified_opinion', 'modifiedopinion', None, str), - FieldMap('other_matters', 'othernoncompliance', None, str), - FieldMap('material_weakness', 'materialweakness', None, str), - FieldMap('significant_deficiency', 'significantdeficiency', None, str), - FieldMap('other_findings', 'otherfindings', None, str), - FieldMap('questioned_costs', 'qcosts', None, str), - FieldMap('repeat_prior_reference', 'repeatfinding', None, str), - FieldMap('prior_references', 'priorfindingrefnums', None, str), - # is_valid is computed in the workbook - ] - g = set_uei(wb, dbkey) - cfdas = Cfda.select(Cfda.elecauditsid).where(Cfda.dbkey == g.dbkey) - findings = Findings.select().where(Findings.dbkey == g.dbkey) - - map_simple_columns(wb, mappings, findings) - - # For each of them, I need to generate an elec -> award mapping. - e2a = {} - award_references = [] - if (cfdas != None) and (findings != None): - for ndx, cfda in enumerate(cfdas): - if cfda: - e2a[cfda.elecauditsid] = f'AWARD-{ndx+1:04d}' - - #award_references = [] - if len(e2a) != 0: - for find in findings: - if find: - award_references.append(e2a[find.elecauditsid]) - - if len(award_references) != 0: - # print("award_references", award_references) - set_range(wb, 'award_reference', award_references) - - wb.save(os.path.join(outdir, f'findings-{dbkey}.xlsx')) - - table = generate_dissemination_test_table('findings', dbkey, mappings, findings) - # Add the award references to the objects. - for obj, ar in zip(table['rows'], award_references): - obj['fields'].append('award_reference') - obj['values'].append(ar) - - return table - -########################################## -# -# generate_federal_awards -# -########################################## -def generate_federal_awards(dbkey, outdir): - print("--- generate federal awards ---") - wb = pyxl.load_workbook(f'templates/{templates["FederalAwards"]}') - # In sheet : in DB - mappings = [ - FieldMap('program_name', 'federalprogramname', None, str), - FieldMap('additional_award_identification', 'awardidentification', None, str), - FieldMap('cluster_name', 'clustername', "N/A", str), - FieldMap('state_cluster_name', 'stateclustername', None, str), - FieldMap('other_cluster_name', 'otherclustername', None, str), - FieldMap('federal_program_total', 'programtotal', 0, int), - FieldMap('cluster_total', 'clustertotal', 0, float), - FieldMap('is_guaranteed', 'loans', None, str), - FieldMap('loan_balance_at_audit_period_end', 'loanbalance', None, float), - FieldMap('is_direct', 'direct', None, str), - FieldMap('is_passed', 'passthroughaward', None, str), - FieldMap('subrecipient_amount', 'passthroughamount', None, float), - FieldMap('is_major', 'majorprogram', None, str), - FieldMap('audit_report_type', 'typereport_mp', None, str), - FieldMap('number_of_audit_findings', 'findings', 0, int), - FieldMap('amount_expended', 'amount', 0, int), - FieldMap('federal_program_total', 'programtotal', 0, int) - ] - g = set_uei(wb, dbkey) - cfdas = Cfda.select().where(Cfda.dbkey == g.dbkey) - map_simple_columns(wb, mappings, cfdas) - - # Map things with transformations - prefixes = map(lambda v: (v.cfda).split('.')[0], cfdas) - extensions = map(lambda v: (v.cfda).split('.')[1], cfdas) - set_range(wb, 'federal_agency_prefix', prefixes) - set_range(wb, 'three_digit_extension', extensions) - - # We have to hop through several tables to build a list - # of passthrough names. Note that anything without a passthrough - # needs to be represented in the list as an empty string. - passthrough_names = [] - passthrough_ids = [] - for cfda in cfdas: - try: - pnq = (Passthrough - .select() - .where((Passthrough.dbkey == cfda.dbkey) & - (Passthrough.elecauditsid == cfda.elecauditsid))).get() - passthrough_names.append(pnq.passthroughname) - passthrough_ids.append(pnq.passthroughid) - # print(f'Looking up {cfda.dbkey}, {cfda.elecauditsid} <- {json.dumps(model_to_dict(pnq))}') - except Exception as e: - passthrough_names.append('') - passthrough_ids.append('') - set_range(wb, 'passthrough_name', passthrough_names) - set_range(wb, 'passthrough_identifying_number', passthrough_ids) - - # Total amount expended must be calculated and inserted - total = 0 - for cfda in cfdas: - total += int(cfda.amount) - set_single_cell_range(wb, 'total_amount_expended', total) - - wb.save(os.path.join(outdir, f'federal-awards-{dbkey}.xlsx')) - - table = generate_dissemination_test_table('federal_awards', dbkey, mappings, cfdas) - # prefix - for obj, pfix, ext in zip(table['rows'], prefixes, extensions): - obj['fields'].append('federal_agency_prefix') - obj['values'].append(pfix) - obj['fields'].append('three_digit_extension') - obj['values'].append(ext) - # names, ids - for obj, name, id in zip(table['rows'], passthrough_names, passthrough_ids): - obj['fields'].append('passthrough_name') - obj['values'].append(name) - obj['fields'].append('passthrough_identifying_number') - obj['values'].append(id) - table['singletons']['auditee_uei'] = g.uei - table['singletons']['total_amount_expended'] = total - - return table - -########################################## -# -# generate_findings_text -# -########################################## -def generate_findings_text(dbkey, outdir): - print("--- generate findings text ---") - wb = pyxl.load_workbook(f'templates/{templates["AuditFindingsText"]}') - mappings = [ - FieldMap('reference_number', 'findingrefnums', None, str), - FieldMap('text_of_finding', 'text', None, str), - FieldMap('contains_chart_or_table', 'chartstables', None, str), - ] - g = set_uei(wb, dbkey) - ftexts = Findingstext.select().where(Findingstext.dbkey == g.dbkey) - map_simple_columns(wb, mappings, ftexts) - wb.save(os.path.join(outdir, f'findings-text-{dbkey}.xlsx')) - table = generate_dissemination_test_table('findings_text', dbkey, mappings, ftexts) - table['singletons']['auditee_uei'] = g.uei - - return table - -########################################## -# -# generate_additional_ueis -# -########################################## -def generate_additional_ueis(dbkey, outdir): - print("--- generate additional ueis ---") - wb = pyxl.load_workbook(f'templates/{templates["AdditionalUEIs"]}') - mappings = [ - FieldMap('additional_uei', 'uei', None, str), - #FieldMap('ueiseqnum', 'ueiseqnum', 0, int) - ] - - g = set_uei(wb, dbkey) - addl_ueis = Ueis.select().where(Ueis.dbkey == g.dbkey) - map_simple_columns(wb, mappings, addl_ueis) - wb.save(os.path.join(outdir, f'additional-ueis-{dbkey}.xlsx')) - table = generate_dissemination_test_table('additional_ueis', dbkey, mappings, addl_ueis) - table['singletons']['auditee_uei'] = g.uei - - return table - -########################################## -# -# generate_notes_to_sefa -# -########################################## -def generate_notes_to_sefa(dbkey, outdir): - print("--- generate notes to sefa ---") - wb = pyxl.load_workbook(f'templates/{templates["SEFA"]}') - mappings = [ - FieldMap('note_title', 'title', None, str), - FieldMap('note_content', 'content', None, str) - ] - g = set_uei(wb, dbkey) - # The mapping is weird. - # https://facdissem.census.gov/Documents/DataDownloadKey.xlsx - # The TYPEID column determines which field in the form a given row corresponds to. - # TYPEID=1 is the description of significant accounting policies. - # TYPEID=2 is the De Minimis cost rate. - # TYPEID=3 is for notes, which have sequence numbers... that must align somewhere. - policies = Notes.select().where((Notes.dbkey == g.dbkey) & (Notes.type_id == 1)).get() - rate = Notes.select().where((Notes.dbkey == g.dbkey) & (Notes.type_id == 2)).get() - notes = Notes.select().where((Notes.dbkey == g.dbkey) & (Notes.type_id == 3)).order_by(Notes.seq_number) - - # This looks like the right way to set the three required fields - set_single_cell_range(wb, 'accounting_policies', policies.content) - # WARNING - # This is being faked. We're askign a Y/N question in the collection. - # Census just let them type some stuff. So, this is a rough - # attempt to generate a Y/N value from the content. - # This means the data is *not* true to what was intended, but - # it *is* good enough for us to use for testing. - is_used = "Huh" - if (re.search('did not use', rate.content) - or re.search('not to use', rate.content) - or re.search('not use', rate.content) - or re.search('not elected', rate.content) - ): - is_used = "N" - elif re.search("used", rate.content): - is_used = "Y" - else: - is_used = "Y&N" - - set_single_cell_range(wb, 'is_minimis_rate_used', is_used) - set_single_cell_range(wb, 'rate_explained', rate.content) - - # Map the rest as notes. - map_simple_columns(wb, mappings, notes) - wb.save(os.path.join(outdir, f'notes-{dbkey}.xlsx')) - - table = generate_dissemination_test_table('notes_to_sefa', dbkey, mappings, notes) - table['singletons']['accounting_policites'] = policies.content - table['singletons']['is_minimis_rate_used'] = is_used - table['singletons']['rate_explained'] = rate.content - table['singletons']['auditee_uei'] = g.uei - return table - -########################################## -# -# generate_secondary_auditors -# -########################################## -def generate_secondary_auditors(dbkey, outdir): - print("--- generate secondary auditors ---") - wb = pyxl.load_workbook(f'templates/{templates["SecondaryAuditors"]}') - mappings = [ - FieldMap('secondary_auditor_address_city', 'cpacity', None, str), - FieldMap('secondary_auditor_contact_name', 'cpacontact', None, str), - FieldMap('secondary_auditor_ein', 'cpaein', 0, int), - FieldMap('secondary_auditor_contact_email', 'cpaemail', None, str), - FieldMap('secondary_auditor_name', 'cpafirmname', None, str), - FieldMap('secondary_auditor_contact_phone', 'cpaphone', None, str), - FieldMap('secondary_auditor_address_state', 'cpastate', None, str), - FieldMap('secondary_auditor_address_street', 'cpastreet1', None, str), - FieldMap('secondary_auditor_contact_title', 'cpatitle', None, str), - FieldMap('secondary_auditor_address_zipcode', 'cpazipcode', None, str) - ] - - g = set_uei(wb, dbkey) - sec_cpas = Cpas.select().where(Cpas.dbkey == g.dbkey) - - map_simple_columns(wb, mappings, sec_cpas) - wb.save(os.path.join(outdir, f'cpas-{dbkey}.xlsx')) - - table = generate_dissemination_test_table('secondary_auditors', dbkey, mappings, sec_cpas) - table['singletons']['auditee_uei'] = g.uei - - return table - -########################################## -# -# generate_captext -# -########################################## -def generate_captext(dbkey, outdir): - print("--- generate corrective action plan ---") - wb = pyxl.load_workbook(f'templates/{templates["CAP"]}') - mappings = [ - FieldMap('reference_number', 'findingrefnums', None, str), - FieldMap('planned_action', 'text', None, str), - FieldMap('contains_chart_or_table', 'chartstables', None, str) - ] - - g = set_uei(wb, dbkey) - captexts = Captext.select().where(Captext.dbkey == g.dbkey) - - map_simple_columns(wb, mappings, captexts) - wb.save(os.path.join(outdir, f'captext-{dbkey}.xlsx')) - - table = generate_dissemination_test_table('cap_text', dbkey, mappings, captexts) - table['singletons']['auditee_uei'] = g.uei - - return table - - -########################################## -def main(): - out_basedir = None - if args.output: - out_basedir = args.output - else: - out_basedir = 'output' - - if not os.path.exists(out_basedir): - try: - os.mkdir(out_basedir) - except Exception as e: - pass - outdir = os.path.join(out_basedir, args.dbkey) - if not os.path.exists(outdir): - try: - os.mkdir(outdir) - except Exception as e: - print('could not create output directory. exiting.') - sys.exit() - - fat = generate_federal_awards(args.dbkey, outdir) - ft = generate_findings(args.dbkey, outdir) - ftt = generate_findings_text(args.dbkey, outdir) - aut = generate_additional_ueis(args.dbkey, outdir) - ntst = generate_notes_to_sefa(args.dbkey, outdir) - sat = generate_secondary_auditors(args.dbkey, outdir) - ctt = generate_captext(args.dbkey, outdir) - tables = [sat, ctt, ntst, aut, ftt, ft, fat] - with open(os.path.join(outdir, f'test-array-{args.dbkey}.json'), "w") as test_file: - jstr = json.dumps(tables, indent=2, sort_keys=True) - test_file.write(jstr) - -if __name__ == '__main__': - parser.add_argument('--dbkey', type=str, required=True) - parser.add_argument('--output', type=str, required=False) - args = parser.parse_args() - main() diff --git a/tools/workbook-generator/requirements.txt b/tools/workbook-generator/requirements.txt deleted file mode 100644 index 6b3fbc5bfd..0000000000 --- a/tools/workbook-generator/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -openpyxl -pandas -peewee \ No newline at end of file diff --git a/tools/workbook-generator/templates/PLACE-TEMPLATE-FILES-HERE b/tools/workbook-generator/templates/PLACE-TEMPLATE-FILES-HERE deleted file mode 100644 index e69de29bb2..0000000000