From b13e15e5a33c1b5035f52f5c3c549d464b3af497 Mon Sep 17 00:00:00 2001 From: kazshak Date: Sun, 24 Jan 2021 19:59:43 -0600 Subject: [PATCH 1/3] Add data cleanning scripts --- .gitignore | 2 + README.md | 11 +++ package-lock.json | 146 +++++++++++++++++++++++++++++++++++++ package.json | 15 ++++ scripts/clean-data.js | 127 ++++++++++++++++++++++++++++++++ scripts/helperFunctions.js | 20 +++++ 6 files changed, 321 insertions(+) create mode 100644 .gitignore create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 scripts/clean-data.js create mode 100644 scripts/helperFunctions.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82f7da1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +local +node_modules diff --git a/README.md b/README.md index d817de8..cdcb007 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,13 @@ # open-data-portal Publishing data of civic interest that may not be available elsewhere + +Scripts contained in the scripts folder provide some code to clean Government Employees Demographic data that is downloaded from the data.Nashville.gov. Because the scripts are written in javascript, and intended to be run in a nodejs environment, if you want to run the scripts, be sure to install the needed packages using the following command: + +npm install + +To execute the clean_data script, change to the scripts directors, and use the following command: + +cd scripts +node clean-data.js --templateFile=2020-08.csv --downloadedFile=General_Government_Employees_Demographics.csv --outputFile=2021-01.csv + +Where --templateFile= is set equal to an already cleaned data file from the past that the script can use as a template; --downloadedFile= is set equal to the name of the file downloaded from data.nashville.gov (note: the script expects this file to be saved in the ../local directory); and the --outputFile= is set equal to the name of the file that should be created to be stored in this open data portal. diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..df6e1b9 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,146 @@ +{ + "name": "open-data-portal", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@data-forge/serialization": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@data-forge/serialization/-/serialization-1.0.1.tgz", + "integrity": "sha512-EP7IWimh5JcDOISVoXvNIjUAqcPN1FkNWvuvjY3uzcswErxB8j93ldlUBvgvGEszqFRwMM3fpMF0HrISg5iBSQ==" + }, + "assertion-error": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", + "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==" + }, + "chai": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz", + "integrity": "sha512-XQU3bhBukrOsQCuwZndwGcCVQHyZi53fQ6Ys1Fym7E4olpIqqZZhhoFJoaKVvV17lWQoXYwgWN2nF5crA8J2jw==", + "requires": { + "assertion-error": "^1.1.0", + "check-error": "^1.0.2", + "deep-eql": "^3.0.1", + "get-func-name": "^2.0.0", + "pathval": "^1.1.0", + "type-detect": "^4.0.5" + } + }, + "check-error": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz", + "integrity": "sha1-V00xLt2Iu13YkS6Sht1sCu1KrII=" + }, + "clone": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/clone/-/clone-1.0.4.tgz", + "integrity": "sha1-2jCcwmPfFZlMaIypAheco8fNfH4=", + "optional": true + }, + "data-forge": { + "version": "1.8.15", + "resolved": "https://registry.npmjs.org/data-forge/-/data-forge-1.8.15.tgz", + "integrity": "sha512-bePEeOgOPcRFO4B/WrAIah+frf0NoIUek4Dv7Z+GHnOlLwO0wPubIzHsConi0ZTF5sHpBm9aciYtCwaD9iRtBQ==", + "requires": { + "@data-forge/serialization": "^1.0.0", + "dayjs": "^1.8.12", + "easy-table": "1.1.0", + "json5": "^2.1.0", + "numeral": "^2.0.6", + "papaparse": "5.2.0", + "typy": "^3.0.1" + } + }, + "data-forge-fs": { + "version": "0.0.9", + "resolved": "https://registry.npmjs.org/data-forge-fs/-/data-forge-fs-0.0.9.tgz", + "integrity": "sha512-7VzK9DbvYqJk/AlQckIh4OMX0iNcvY4Za8vWXh6LT+TGekAs791XUjB2OsEDlRcmRr3DdQ7aecVBRZ+wevjhnA==", + "requires": { + "chai": "^4.1.2" + } + }, + "dayjs": { + "version": "1.10.4", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.10.4.tgz", + "integrity": "sha512-RI/Hh4kqRc1UKLOAf/T5zdMMX5DQIlDxwUe3wSyMMnEbGunnpENCdbUgM+dW7kXidZqCttBrmw7BhN4TMddkCw==" + }, + "deep-eql": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-3.0.1.tgz", + "integrity": "sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw==", + "requires": { + "type-detect": "^4.0.0" + } + }, + "defaults": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.3.tgz", + "integrity": "sha1-xlYFHpgX2f8I7YgUd/P+QBnz730=", + "optional": true, + "requires": { + "clone": "^1.0.2" + } + }, + "easy-table": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/easy-table/-/easy-table-1.1.0.tgz", + "integrity": "sha1-hvmrTBAvA3G3KXuSplHVgkvIy3M=", + "requires": { + "wcwidth": ">=1.0.1" + } + }, + "get-func-name": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.0.tgz", + "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=" + }, + "json5": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.1.3.tgz", + "integrity": "sha512-KXPvOm8K9IJKFM0bmdn8QXh7udDh1g/giieX0NLCaMnb4hEiVFqnop2ImTXCc5e0/oHz3LTqmHGtExn5hfMkOA==", + "requires": { + "minimist": "^1.2.5" + } + }, + "minimist": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" + }, + "numeral": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/numeral/-/numeral-2.0.6.tgz", + "integrity": "sha1-StCAk21EPCVhrtnyGX7//iX05QY=" + }, + "papaparse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.2.0.tgz", + "integrity": "sha512-ylq1wgUSnagU+MKQtNeVqrPhZuMYBvOSL00DHycFTCxownF95gpLAk1HiHdUW77N8yxRq1qHXLdlIPyBSG9NSA==" + }, + "pathval": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.0.tgz", + "integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=" + }, + "type-detect": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", + "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==" + }, + "typy": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/typy/-/typy-3.3.0.tgz", + "integrity": "sha512-Du53deMF9X9pSM3gVXDjLBq14BUfZWSGKfmmR1kTlg953RaIZehfc8fQuoAiW+SRO6bJsP+59mv1tsH8vwKghg==" + }, + "wcwidth": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz", + "integrity": "sha1-8LDc+RW8X/FSivrbLA4XtTLaL+g=", + "optional": true, + "requires": { + "defaults": "^1.0.3" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..27e4525 --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "name": "open-data-portal", + "version": "1.0.0", + "description": "Publishing data of civic interest that may not be available elsewhere", + "main": "./scripts/main.js", + "scripts": { + "test": "test" + }, + "author": "", + "license": "ISC", + "dependencies": { + "data-forge": "^1.8.15", + "data-forge-fs": "0.0.9" + } +} diff --git a/scripts/clean-data.js b/scripts/clean-data.js new file mode 100644 index 0000000..7fa374d --- /dev/null +++ b/scripts/clean-data.js @@ -0,0 +1,127 @@ +const df = require('data-forge'); +require('data-forge-fs'); +const hf = require('./helperFunctions.js'); + +let args = require('minimist')(process.argv.slice(2)); + +const destPath = '../nashville/metro-general-government-employees-demographic-data/renamed-csv/'; +const templateFile = args['templateFile']; + +const originPath = '../local/'; +const newFile = args['downloadedFile']; +const outFileName = args['outputFile']; + +// load some existing data to use as a template +// +let currentData = hf.getFile(destPath + templateFile); +console.log('Previous data file loaded, to use as template ...'); + +// extract the job categories with their codes into a new data frame +// these are the job categories that exist in the current data +// and add a category of 'Unknown' +// +let jobCategories = hf.getCategory(currentData, 'EEO Job Category', 'EEO Job Category Description') + .concat(new df.DataFrame([{ + 'EEO Job Category': '99', + 'EEO Job Category Description': 'Unknown' + }])); +console.log('\nJob categories: \n', jobCategories.toCSV()); + +// extract the ethnicities along wiht their ethnic codes into a new data frame +// these are the ethnic codes that exist in the current data +// +let ethnicCategories = hf.getCategory(currentData, 'Ethnic Code', 'Ethnic Code Description'); +console.log('\nEthnic categories: \n', ethnicCategories.toCSV()); + +// load the new data +// and replace any missing ethnic descriptions with 'Two or More Races' +// and replace any missing job descriptions with 'Unknown' +// +let newData = hf.getFile(originPath + newFile) + .select(row => { + if (row['Ethnic Code Description'].trim() === '' || row['Ethnic Code Description'] === null) { + row['Ethnic Code Description'] = 'Two or More Races'; + }; + + if (row['EEO Job Category Description'].trim() === '' || row['EEO Job Category Description'] === null) { + row['EEO Job Category Description'] = 'Unknown' + }; + + return row; + }); +// join jobCategories and ethnicCategories to the newly loaded data +// to make sure the job codes, and ethnic codes are consistent with +// previous data. +// +let combined = newData + .join( + jobCategories, + left => left["EEO Job Category Description"], + right => right["EEO Job Category Description"], + (left, right) => { + return { + 'Pay Grade / Step': left['Pay Grade / Step'], + 'Annual Salary': left['Annual Salary'], + 'Class': left['Class'], + 'Title': left['Title'], + 'Current Department': left['Current Department'], + 'Employment Status': left['Employment Status'], + 'EEO Job Category': right['EEO Job Category'], + 'EEO Job Category Description': left['EEO Job Category Description'], + 'Gender': left['Gender'], + 'Ethnic Code Description': left['Ethnic Code Description'], + 'Year of Birth': left['Year of Birth'], + 'Date Started': left['Date Started'], + 'FLSA Exempt?': left['FLSA Exempt?'], + 'County': left['County'] + } + } + ) + .join( + ethnicCategories, + left => left["Ethnic Code Description"], + right => right["Ethnic Code Description"], + (left, right) => { + return { + 'Pay Grade / Step': left['Pay Grade / Step'], + 'Annual Salary': left['Annual Salary'], + 'Class': left['Class'], + 'Title': left['Title'], + 'Current Department': left['Current Department'], + 'Employment Status': left['Employment Status'], + 'EEO Job Category': left['EEO Job Category'], + 'EEO Job Category Description': left['EEO Job Category Description'], + 'Gender': left['Gender'], + 'Ethnic Code': right['Ethnic Code'], + 'Ethnic Code Description': left['Ethnic Code Description'], + 'Year of Birth': left['Year of Birth'], + 'Date Started': left['Date Started'], + 'FLSA Exempt?': left['FLSA Exempt?'], + 'County': left['County'] + } + } + ); + +// check if the columns in the template data and the newly loaded and +// adjusted data, are the same and in the same order. +// if they are, then write the new file to renamed-csv folder +// if they are not, then display some information about the columns, and +// exit the application. +// +if (currentData.getColumnNames().every((v, i) => v === combined.getColumnNames()[i])) { + console.log('\nCols in new data match all cols in current data.'); + console.log('new, num rows: ', newData.count()); + console.log('combined, new rows: ', combined.count()); + + combined.asCSV().writeFileSync(destPath + outFileName); + console.log('\nNew file ', outFileName, ' written to ', destPath, ' directory\n'); +} else { + console.log("Cols in current data: ",currentData.getColumnNames()); + console.log("Cols in new combined data: ", combined.getColumnNames()); + + console.log('current, num cols: ', currentData.getColumnNames().length); + console.log('combined, num cols: ', combined.getColumnNames().length); + console.log(combined.head(10).toStrings("Date Started", "MM/DD/YYYY").toCSV()); + + process.exitCode = 1; +} diff --git a/scripts/helperFunctions.js b/scripts/helperFunctions.js new file mode 100644 index 0000000..7aea49c --- /dev/null +++ b/scripts/helperFunctions.js @@ -0,0 +1,20 @@ +const df = require('data-forge'); +require('data-forge-fs'); + +exports.getFile = function getFile(filename) { + return result = df.readFileSync(filename) + .parseCSV(); +}; + +exports.getCategory = function getCategory(data, categoryIdColumn, categoryDescColumn) { + return data + .groupBy(row => row[categoryIdColumn]) + .select(group => { + return { + [categoryIdColumn]: group.first()[categoryIdColumn], + [categoryDescColumn]: group.deflate(row => row[categoryDescColumn]).first() + }; + }) + .orderBy(row => row[categoryIdColumn]) + .inflate(); +}; From d2eaa051b39680077afd19ebf30ca9cc895a5c89 Mon Sep 17 00:00:00 2001 From: kazshak Date: Sat, 10 Apr 2021 15:41:54 -0500 Subject: [PATCH 2/3] created get-data script --- README.md | 16 +- package-lock.json | 331 ++++++++++++++++++++++++++++++++++++++++++ package.json | 3 +- scripts/clean-data.js | 4 + scripts/get-data.js | 59 ++++++++ 5 files changed, 411 insertions(+), 2 deletions(-) create mode 100644 scripts/get-data.js diff --git a/README.md b/README.md index cdcb007..8a457da 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,25 @@ # open-data-portal Publishing data of civic interest that may not be available elsewhere +## Scripts To Get And Clean Data Scripts contained in the scripts folder provide some code to clean Government Employees Demographic data that is downloaded from the data.Nashville.gov. Because the scripts are written in javascript, and intended to be run in a nodejs environment, if you want to run the scripts, be sure to install the needed packages using the following command: npm install -To execute the clean_data script, change to the scripts directors, and use the following command: +### get-data.js +To execute the get-data script, change to the scripts directory, and use the following +command: + +cd scripts +node clean-data.js --outputFile=test.csv + +Where --outputFile is set equal to a file name that will be created in the +local directory for use by the clean-data script. There is an optional parameter +--url= that can be used to specify the url of the api, if not specified +the code will use the default value of https://data.nashville.gov/resource/4ibi-mxs4.csv + +### clean-data.js +To execute the clean-data script, change to the scripts directors, and use the following command: cd scripts node clean-data.js --templateFile=2020-08.csv --downloadedFile=General_Government_Employees_Demographics.csv --outputFile=2021-01.csv diff --git a/package-lock.json b/package-lock.json index df6e1b9..402a094 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,11 +9,63 @@ "resolved": "https://registry.npmjs.org/@data-forge/serialization/-/serialization-1.0.1.tgz", "integrity": "sha512-EP7IWimh5JcDOISVoXvNIjUAqcPN1FkNWvuvjY3uzcswErxB8j93ldlUBvgvGEszqFRwMM3fpMF0HrISg5iBSQ==" }, + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "asn1": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "requires": { + "safer-buffer": "~2.1.0" + } + }, + "assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" + }, "assertion-error": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==" }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" + }, + "aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" + }, + "aws4": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.11.0.tgz", + "integrity": "sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==" + }, + "bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", + "requires": { + "tweetnacl": "^0.14.3" + } + }, + "caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" + }, "chai": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz", @@ -38,6 +90,27 @@ "integrity": "sha1-2jCcwmPfFZlMaIypAheco8fNfH4=", "optional": true }, + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "requires": { + "delayed-stream": "~1.0.0" + } + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" + }, + "dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", + "requires": { + "assert-plus": "^1.0.0" + } + }, "data-forge": { "version": "1.8.15", "resolved": "https://registry.npmjs.org/data-forge/-/data-forge-1.8.15.tgz", @@ -82,6 +155,11 @@ "clone": "^1.0.2" } }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" + }, "easy-table": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/easy-table/-/easy-table-1.1.0.tgz", @@ -90,11 +168,117 @@ "wcwidth": ">=1.0.1" } }, + "ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", + "requires": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" + }, + "forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" + }, + "form-data": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12" + } + }, "get-func-name": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.0.tgz", "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=" }, + "getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", + "requires": { + "assert-plus": "^1.0.0" + } + }, + "har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=" + }, + "har-validator": { + "version": "5.1.5", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz", + "integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==", + "requires": { + "ajv": "^6.12.3", + "har-schema": "^2.0.0" + } + }, + "http-signature": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", + "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=", + "requires": { + "assert-plus": "^1.0.0", + "jsprim": "^1.2.2", + "sshpk": "^1.7.0" + } + }, + "is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=" + }, + "isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" + }, + "jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=" + }, + "json-schema": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", + "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, + "json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" + }, "json5": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.1.3.tgz", @@ -103,6 +287,30 @@ "minimist": "^1.2.5" } }, + "jsprim": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", + "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", + "requires": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.2.3", + "verror": "1.10.0" + } + }, + "mime-db": { + "version": "1.47.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.47.0.tgz", + "integrity": "sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw==" + }, + "mime-types": { + "version": "2.1.30", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.30.tgz", + "integrity": "sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==", + "requires": { + "mime-db": "1.47.0" + } + }, "minimist": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", @@ -113,6 +321,11 @@ "resolved": "https://registry.npmjs.org/numeral/-/numeral-2.0.6.tgz", "integrity": "sha1-StCAk21EPCVhrtnyGX7//iX05QY=" }, + "oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" + }, "papaparse": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.2.0.tgz", @@ -123,6 +336,101 @@ "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.0.tgz", "integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=" }, + "performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" + }, + "psl": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.8.0.tgz", + "integrity": "sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==" + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" + }, + "request": { + "version": "2.88.2", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz", + "integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==", + "requires": { + "aws-sign2": "~0.7.0", + "aws4": "^1.8.0", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "form-data": "~2.3.2", + "har-validator": "~5.1.3", + "http-signature": "~1.2.0", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", + "performance-now": "^2.1.0", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.5.0", + "tunnel-agent": "^0.6.0", + "uuid": "^3.3.2" + } + }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "sshpk": { + "version": "1.16.1", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", + "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", + "requires": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + } + }, + "tough-cookie": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz", + "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==", + "requires": { + "psl": "^1.1.28", + "punycode": "^2.1.1" + } + }, + "tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=", + "requires": { + "safe-buffer": "^5.0.1" + } + }, + "tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=" + }, "type-detect": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", @@ -133,6 +441,29 @@ "resolved": "https://registry.npmjs.org/typy/-/typy-3.3.0.tgz", "integrity": "sha512-Du53deMF9X9pSM3gVXDjLBq14BUfZWSGKfmmR1kTlg953RaIZehfc8fQuoAiW+SRO6bJsP+59mv1tsH8vwKghg==" }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "requires": { + "punycode": "^2.1.0" + } + }, + "uuid": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", + "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==" + }, + "verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=", + "requires": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, "wcwidth": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz", diff --git a/package.json b/package.json index 27e4525..083ce8f 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "license": "ISC", "dependencies": { "data-forge": "^1.8.15", - "data-forge-fs": "0.0.9" + "data-forge-fs": "0.0.9", + "request": "^2.88.2" } } diff --git a/scripts/clean-data.js b/scripts/clean-data.js index 7fa374d..02ef836 100644 --- a/scripts/clean-data.js +++ b/scripts/clean-data.js @@ -1,3 +1,7 @@ +// +// example useage from the command prompt: +// node clean-data.js --templateFile=2020-08.csv --downloadedFile=General_Government_Employees_Demographics.csv --outputFile=2021-01.csv +// const df = require('data-forge'); require('data-forge-fs'); const hf = require('./helperFunctions.js'); diff --git a/scripts/get-data.js b/scripts/get-data.js new file mode 100644 index 0000000..1244f96 --- /dev/null +++ b/scripts/get-data.js @@ -0,0 +1,59 @@ +// +// This script loads the government employees data from the data.nashville.gov +// open data portal, and saves the results as a local csv file. +// +// example useage from the command prompt: +// node get-data.js --outputFile=test.csv +// or +// node get-data.js --outputFile=test.csv --url=https://data.nashville.gov/resource/4ibi-mxs4.csv +// url paramater is optional, the default value is https://data.nashville.gov/resource/4ibi-mxs4.csv +// +var request = require('request'); +var fs = require('fs'); + +let args = require('minimist')(process.argv.slice(2)); + +const apiUrl = args['url'] || 'https://data.nashville.gov/resource/4ibi-mxs4.csv'; +const destPath = '../local/'; +const outputFile = args['outputFile'] + +const options = { + 'method': 'GET', + 'url': apiUrl, + 'headers': { + } +}; +request(options, function (error, response) { + if (error) throw new Error(error); + let lines = response.body.split("\n"); + // previous files were downloaded as excel workbooks, and those + // had sligtly different column titles. + // the below object is used to translate the titles provided by the api + // into the same titles as were previously used for the excel versions + // of the files. + // + const titleTranslate = { + "pay_grade_step": "Pay Grade / Step", + "annual_salary": "Annual Salary", + "class": "Class", + "title": "Title", + "current_dept_description": "Current Department", + "employment_status": "Employment Status", + "eeo_job_cat_desc": "EEO Job Category Description", + "gender": "Gender", + "ethnic_code_description": "Ethnic Code Description", + "year_of_birth": "Year of Birth", + "date_started": "Date Started", + "flsa_exempt_y_n": "FLSA Exempt?", + "county": "County" + }; + + for (const apiTitle in titleTranslate) { + lines[0] = lines[0].replace(apiTitle, titleTranslate[apiTitle]); + }; + + const result = lines.join('\n'); + + fs.writeFileSync(destPath + outputFile, result); + console.log(`File ${destPath + outputFile} written.`); +}); From da215022055a93c6c1c56f34992978126975c739 Mon Sep 17 00:00:00 2001 From: kazshak Date: Sat, 10 Apr 2021 15:46:40 -0500 Subject: [PATCH 3/3] Readme Update --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8a457da..5204362 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ To execute the get-data script, change to the scripts directory, and use the fol command: cd scripts + node clean-data.js --outputFile=test.csv Where --outputFile is set equal to a file name that will be created in the @@ -22,6 +23,8 @@ the code will use the default value of https://data.nashville.gov/resource/4ibi- To execute the clean-data script, change to the scripts directors, and use the following command: cd scripts -node clean-data.js --templateFile=2020-08.csv --downloadedFile=General_Government_Employees_Demographics.csv --outputFile=2021-01.csv + +node clean-data.js --templateFile=2020-08.csv +--downloadedFile=General_Government_Employees_Demographics.csv --outputFile=2021-01.csv Where --templateFile= is set equal to an already cleaned data file from the past that the script can use as a template; --downloadedFile= is set equal to the name of the file downloaded from data.nashville.gov (note: the script expects this file to be saved in the ../local directory); and the --outputFile= is set equal to the name of the file that should be created to be stored in this open data portal.