data-govt-nz · Feb 25, 2018 · Feb 25, 2018 · Aug 20, 2019 · Nov 20, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Data.json conversion tool
 
+## 1.2.0
+ - Add CKAN CSV exporter tool
+
+## 1.1.0
+ - Add Heroku support
+
+## 1.0.0
+ - Convertion tool turned into web app
+
 ## 0.1.0
  - Inital public release of the node conversion tool.
  - Documentation provided in README.
diff --git a/README.md b/README.md
@@ -133,7 +133,7 @@ The data.json file should have ASCII or UTF-8 character encoding (as per the JSO
 
 ## Comparison with USA data.json schema
 
-The NZ data.json format is based on both the [Data.gov.uk data.json](http://guidance.data.gov.uk/dcat_fields.html) and the [Project Open Data data.json schema](https://project-open-data.cio.gov/v1.1/schema/) used by the U.S. Federal Government. Both are derivatives of the [DCAT (data catalog vocabulary)](https://www.w3.org/TR/vocab-dcat/) schema standard.
+The NZ data.json format is based on both the [Data.gov.uk data.json](https://guidance.data.gov.uk/publish_and_manage_data/harvest_or_add_data/harvest_data/dcat/#accepted-dcat-and-data-json-fields) and the [Project Open Data data.json schema](https://project-open-data.cio.gov/v1.1/schema/) used by the U.S. Federal Government. Both are derivatives of the [DCAT (data catalog vocabulary)](https://www.w3.org/TR/vocab-dcat/) schema standard.
 
 There are a few conscious differences that are listed here for reference:
 
@@ -151,5 +151,4 @@ See [LICENSE.md](LICENSE.md)
 See [CONTRIBUTING.md](CONTRIBUTING.md)
 
 ## Maintainer
- - Cam Findlay <cam.findlay@dia.govt.nz>
  - Data.govt.nz team <info@data.govt.nz>
diff --git a/jsonToCsv/README.md b/jsonToCsv/README.md
@@ -0,0 +1,41 @@
+# Convert a data.json to CSV
+
+This module is intended to help new organizations onboard with automation by using their exsisting [data.govt](https://data.govt.nz) catalogue to populate a CSV.
+
+Once you have the csv you can update your data.json file by using the CLI/Web interface of the CSV to DCAT converter.
+
+
+## How to use this tool
+
+**requirements**
+node `8.9.4` and npm `5.6.0`
+
+**Usage**
+
+1. clone the repo, cd into the repo.
+2. install the modules in this directory `npm install`.
+3. run the script (exporting the ministry of health organization) `node jsonToCsv.js -a ministry-of-health`.
+
+**Options**
+
+```
+  Usage: jsonToCsv [options]
+
+  Tool for downloading an agency's existing data from data.govt in a format they can use to re-submit it via the schema tool.
+
+
+  Options:
+
+    -V, --version          output the version number
+    -a, --agency [agency]  CKAN agency identifier, eg: ministry-of-health
+    -f, --file [fileName]  CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv
+    -r, --row [count]      Number of records to be download, defaults to: 1000
+    -u, --url [url]        ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz
+    -h, --help             output usage information
+```
+
+**Tests**
+We have tests.
+```
+./node_modules/.bin/jest
+```
diff --git a/jsonToCsv/jsonToCsv.js b/jsonToCsv/jsonToCsv.js
@@ -0,0 +1,59 @@
+'use strict'
+const jsonpath = require('jsonpath')
+const fetch = require('node-fetch')
+const json2csv = require('json2csv')
+const flatten = require('lodash.flatten')
+const uniq = require('lodash.uniq')
+const fs = require('fs')
+const program = require('commander')
+const jsonResultToCsvRow = require('./lib/jsonResultToCsvRow')
+
+program
+  .version('0.1.0')
+  .description('Tool for downloading an agency\'s existing data from data.govt in a format they can use to re-submit it via the schema tool.')
+  .option('-a, --agency [agency]', 'CKAN agency identifier, eg: ministry-of-health')
+  .option('-f, --file [fileName]', 'CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv')
+  .option('-r, --row [count]', 'Number of records to be download, defaults to: 1000')
+  .option('-u, --url [url]', 'ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz')
+  .parse(process.argv)
+
+const agencyIdentifier = program.agency
+
+if (!agencyIdentifier) {
+  console.error('--agency CKAN agency identifier, eg: ministry-of-health, is a required parameter. try `node jsonToCsv.js --agency ministry-of-health`')
+  process.exit()
+}
+const host = program.url || 'https://catalogue.data.govt.nz'
+const recordCount = program.row || 1000
+const url = `${host}/api/action/package_search?fq=organization:${agencyIdentifier}&rows=${recordCount}`
+const fileName = program.file || agencyIdentifier + '.csv'
+
+convertOnlineReport()
+
+async function convertOnlineReport () {
+  const response = await fetch(url)
+  const body = await response.json()
+
+  const data = mapObjects(body)
+  const fields = getHeaderFieldNames(data)
+  const csv = json2csv({data, fields})
+  fs.writeFileSync(`./${fileName}`, csv)
+  console.log('Finished, written to file.')
+}
+
+function mapObjects (jsonInput) {
+  const results = jsonpath.query(jsonInput, '$.result.results.*')
+  const mappedResults = results.map(jsonResultToCsvRow.map)
+  return mappedResults
+}
+
+function getHeaderFieldNames (data) {
+  const nestedFields = data.map(obj => Object.keys(obj))
+  const flattenedFields = flatten(nestedFields)
+  const uniqueFields = uniq(flattenedFields)
+  return uniqueFields
+}
+
+module.exports = {
+  convert: mapObjects
+}
diff --git a/jsonToCsv/lib/jsonResultToCsvRow.js b/jsonToCsv/lib/jsonResultToCsvRow.js
@@ -0,0 +1,57 @@
+'use strict'
+const jsonpath = require('jsonpath')
+const mergeObjects = Object.assign
+
+module.exports = {
+  map: mapObject
+}
+
+function mapObject (jsonInput) {
+  let mappedObject = {
+    title: jsonInput.title,
+    description: jsonInput.notes,
+    identifier: jsonInput.url,
+    licence: jsonInput.license_url,
+    keywords: getKeywords(jsonInput),
+    issued: jsonInput.issued,
+    modified: jsonInput.modified,
+    'publisher.name': jsonInput.author,
+    'publisher.mbox': jsonInput.author_email,
+    'contactPoint.fn': jsonInput.maintainer,
+    'contactPoint.hasPhone': jsonInput.maintainer_phone,
+    'contactPoint.hasEmail': jsonInput.maintainer_email,
+    landingPage: jsonInput.url,
+    updateFrequency: jsonInput.frequency_of_update,
+    theme: jsonInput.theme,
+    temporal: jsonInput.temporal,
+    spatial: jsonInput.spatial
+  }
+  const distributions = getDistributions(jsonInput)
+  mergeObjects(mappedObject, distributions)
+  return mappedObject
+}
+
+function getKeywords (jsonInput) {
+  const tags = jsonpath.query(jsonInput, '$.tags.*.display_name')
+  const keywords = tags.join(', ')
+  return keywords
+}
+
+function getDistributions (jsonInput) {
+  const distributions = jsonpath.query(jsonInput, '$.resources.*')
+  const mappedDistributions = distributions.map((val, i) => mapDistribution(val, i))
+  const result = {}
+  mappedDistributions.forEach(distribution => {
+    mergeObjects(result, distribution)
+  })
+  return result
+}
+
+function mapDistribution (distribution, i) {
+  return {
+    [`distribution.${i}.downloadURL`]: distribution.url,
+    [`distribution.${i}.format`]: distribution.format,
+    [`distribution.${i}.size`]: distribution.size,
+    [`distribution.${i}.title`]: distribution.name
+  }
+}