Skip to content

Commit

Permalink
Add CKAN API json to CSV exporter
Browse files Browse the repository at this point in the history
This feature allows an agency to export their current set of dataset metadata matching the data.govt.nz dcat schema.
  • Loading branch information
anotheredward authored and camfindlay committed Feb 25, 2018
1 parent 724dc5d commit 015dc9b
Show file tree
Hide file tree
Showing 10 changed files with 8,650 additions and 19 deletions.
41 changes: 41 additions & 0 deletions jsonToCsv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Convert a data.json to CSV

This module is intended to help new organizations onboard with automation by using their exsisting [data.govt](https://data.govt.nz) catalogue to populate a CSV.

Once you have the csv you can update your data.json file by using the CLI/Web interface of the CSV to DCAT converter.


## How to use this tool

**requirements**
node `8.9.4` and npm `5.6.0`

**Usage**

1. clone the repo, cd into the repo.
2. install the modules in this directory `npm install`.
3. run the script (exporting the ministry of health organization) `node jsonToCsv.js -a ministry-of-health`.

**Options**

```
Usage: jsonToCsv [options]
Tool for downloading an agency's existing data from data.govt in a format they can use to re-submit it via the schema tool.
Options:
-V, --version output the version number
-a, --agency [agency] CKAN agency identifier, eg: ministry-of-health
-f, --file [fileName] CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv
-r, --row [count] Number of records to be download, defaults to: 1000
-u, --url [url] ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz
-h, --help output usage information
```

**Tests**
We have tests.
```
./node_modules/.bin/jest
```
59 changes: 59 additions & 0 deletions jsonToCsv/jsonToCsv.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
'use strict'
const jsonpath = require('jsonpath')
const fetch = require('node-fetch')
const json2csv = require('json2csv')
const flatten = require('lodash.flatten')
const uniq = require('lodash.uniq')
const fs = require('fs')
const program = require('commander')
const jsonResultToCsvRow = require('./lib/jsonResultToCsvRow')

program
.version('0.1.0')
.description('Tool for downloading an agency\'s existing data from data.govt in a format they can use to re-submit it via the schema tool.')
.option('-a, --agency [agency]', 'CKAN agency identifier, eg: ministry-of-health')
.option('-f, --file [fileName]', 'CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv')
.option('-r, --row [count]', 'Number of records to be download, defaults to: 1000')
.option('-u, --url [url]', 'ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz')
.parse(process.argv)

const agencyIdentifier = program.agency

if (!agencyIdentifier) {
console.error('--agency CKAN agency identifier, eg: ministry-of-health, is a required parameter. try `node jsonToCsv.js --agency ministry-of-health`')
process.exit()
}
const host = program.url || 'https://catalogue.data.govt.nz'
const recordCount = program.row || 1000
const url = `${host}/api/action/package_search?fq=organization:${agencyIdentifier}&rows=${recordCount}`
const fileName = program.file || agencyIdentifier + '.csv'

convertOnlineReport()

async function convertOnlineReport () {
const response = await fetch(url)
const body = await response.json()

const data = mapObjects(body)
const fields = getHeaderFieldNames(data)
const csv = json2csv({data, fields})
fs.writeFileSync(`./${fileName}`, csv)
console.log('Finished, written to file.')
}

function mapObjects (jsonInput) {
const results = jsonpath.query(jsonInput, '$.result.results.*')
const mappedResults = results.map(jsonResultToCsvRow.map)
return mappedResults
}

function getHeaderFieldNames (data) {
const nestedFields = data.map(obj => Object.keys(obj))
const flattenedFields = flatten(nestedFields)
const uniqueFields = uniq(flattenedFields)
return uniqueFields
}

module.exports = {
convert: mapObjects
}
57 changes: 57 additions & 0 deletions jsonToCsv/lib/jsonResultToCsvRow.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
'use strict'
const jsonpath = require('jsonpath')
const mergeObjects = Object.assign

module.exports = {
map: mapObject
}

function mapObject (jsonInput) {
let mappedObject = {
title: jsonInput.title,
description: jsonInput.notes,
identifier: jsonInput.url,
licence: jsonInput.license_url,
keywords: getKeywords(jsonInput),
issued: jsonInput.issued,
modified: jsonInput.modified,
'publisher.name': jsonInput.author,
'publisher.mbox': jsonInput.author_email,
'contactPoint.fn': jsonInput.maintainer,
'contactPoint.hasPhone': jsonInput.maintainer_phone,
'contactPoint.hasEmail': jsonInput.maintainer_email,
landingPage: jsonInput.url,
updateFrequency: jsonInput.frequency_of_update,
theme: jsonInput.theme,
temporal: jsonInput.temporal,
spatial: jsonInput.spatial
}
const distributions = getDistributions(jsonInput)
mergeObjects(mappedObject, distributions)
return mappedObject
}

function getKeywords (jsonInput) {
const tags = jsonpath.query(jsonInput, '$.tags.*.display_name')
const keywords = tags.join(', ')
return keywords
}

function getDistributions (jsonInput) {
const distributions = jsonpath.query(jsonInput, '$.resources.*')
const mappedDistributions = distributions.map((val, i) => mapDistribution(val, i))
const result = {}
mappedDistributions.forEach(distribution => {
mergeObjects(result, distribution)
})
return result
}

function mapDistribution (distribution, i) {
return {
[`distribution.${i}.downloadURL`]: distribution.url,
[`distribution.${i}.format`]: distribution.format,
[`distribution.${i}.size`]: distribution.size,
[`distribution.${i}.title`]: distribution.name
}
}
Loading

0 comments on commit 015dc9b

Please sign in to comment.