-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This feature allows an agency to export their current set of dataset metadata matching the data.govt.nz dcat schema.
- Loading branch information
1 parent
724dc5d
commit 015dc9b
Showing
10 changed files
with
8,650 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Convert a data.json to CSV | ||
|
||
This module is intended to help new organizations onboard with automation by using their exsisting [data.govt](https://data.govt.nz) catalogue to populate a CSV. | ||
|
||
Once you have the csv you can update your data.json file by using the CLI/Web interface of the CSV to DCAT converter. | ||
|
||
|
||
## How to use this tool | ||
|
||
**requirements** | ||
node `8.9.4` and npm `5.6.0` | ||
|
||
**Usage** | ||
|
||
1. clone the repo, cd into the repo. | ||
2. install the modules in this directory `npm install`. | ||
3. run the script (exporting the ministry of health organization) `node jsonToCsv.js -a ministry-of-health`. | ||
|
||
**Options** | ||
|
||
``` | ||
Usage: jsonToCsv [options] | ||
Tool for downloading an agency's existing data from data.govt in a format they can use to re-submit it via the schema tool. | ||
Options: | ||
-V, --version output the version number | ||
-a, --agency [agency] CKAN agency identifier, eg: ministry-of-health | ||
-f, --file [fileName] CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv | ||
-r, --row [count] Number of records to be download, defaults to: 1000 | ||
-u, --url [url] ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz | ||
-h, --help output usage information | ||
``` | ||
|
||
**Tests** | ||
We have tests. | ||
``` | ||
./node_modules/.bin/jest | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
'use strict' | ||
const jsonpath = require('jsonpath') | ||
const fetch = require('node-fetch') | ||
const json2csv = require('json2csv') | ||
const flatten = require('lodash.flatten') | ||
const uniq = require('lodash.uniq') | ||
const fs = require('fs') | ||
const program = require('commander') | ||
const jsonResultToCsvRow = require('./lib/jsonResultToCsvRow') | ||
|
||
program | ||
.version('0.1.0') | ||
.description('Tool for downloading an agency\'s existing data from data.govt in a format they can use to re-submit it via the schema tool.') | ||
.option('-a, --agency [agency]', 'CKAN agency identifier, eg: ministry-of-health') | ||
.option('-f, --file [fileName]', 'CSV filename to write to, eg: output.csv, defaults to: <agency-identifer>.csv') | ||
.option('-r, --row [count]', 'Number of records to be download, defaults to: 1000') | ||
.option('-u, --url [url]', 'ckan url, eg: https://catalogue.data.govt.nz, defaults to: https://catalogue.data.govt.nz') | ||
.parse(process.argv) | ||
|
||
const agencyIdentifier = program.agency | ||
|
||
if (!agencyIdentifier) { | ||
console.error('--agency CKAN agency identifier, eg: ministry-of-health, is a required parameter. try `node jsonToCsv.js --agency ministry-of-health`') | ||
process.exit() | ||
} | ||
const host = program.url || 'https://catalogue.data.govt.nz' | ||
const recordCount = program.row || 1000 | ||
const url = `${host}/api/action/package_search?fq=organization:${agencyIdentifier}&rows=${recordCount}` | ||
const fileName = program.file || agencyIdentifier + '.csv' | ||
|
||
convertOnlineReport() | ||
|
||
async function convertOnlineReport () { | ||
const response = await fetch(url) | ||
const body = await response.json() | ||
|
||
const data = mapObjects(body) | ||
const fields = getHeaderFieldNames(data) | ||
const csv = json2csv({data, fields}) | ||
fs.writeFileSync(`./${fileName}`, csv) | ||
console.log('Finished, written to file.') | ||
} | ||
|
||
function mapObjects (jsonInput) { | ||
const results = jsonpath.query(jsonInput, '$.result.results.*') | ||
const mappedResults = results.map(jsonResultToCsvRow.map) | ||
return mappedResults | ||
} | ||
|
||
function getHeaderFieldNames (data) { | ||
const nestedFields = data.map(obj => Object.keys(obj)) | ||
const flattenedFields = flatten(nestedFields) | ||
const uniqueFields = uniq(flattenedFields) | ||
return uniqueFields | ||
} | ||
|
||
module.exports = { | ||
convert: mapObjects | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
'use strict' | ||
const jsonpath = require('jsonpath') | ||
const mergeObjects = Object.assign | ||
|
||
module.exports = { | ||
map: mapObject | ||
} | ||
|
||
function mapObject (jsonInput) { | ||
let mappedObject = { | ||
title: jsonInput.title, | ||
description: jsonInput.notes, | ||
identifier: jsonInput.url, | ||
licence: jsonInput.license_url, | ||
keywords: getKeywords(jsonInput), | ||
issued: jsonInput.issued, | ||
modified: jsonInput.modified, | ||
'publisher.name': jsonInput.author, | ||
'publisher.mbox': jsonInput.author_email, | ||
'contactPoint.fn': jsonInput.maintainer, | ||
'contactPoint.hasPhone': jsonInput.maintainer_phone, | ||
'contactPoint.hasEmail': jsonInput.maintainer_email, | ||
landingPage: jsonInput.url, | ||
updateFrequency: jsonInput.frequency_of_update, | ||
theme: jsonInput.theme, | ||
temporal: jsonInput.temporal, | ||
spatial: jsonInput.spatial | ||
} | ||
const distributions = getDistributions(jsonInput) | ||
mergeObjects(mappedObject, distributions) | ||
return mappedObject | ||
} | ||
|
||
function getKeywords (jsonInput) { | ||
const tags = jsonpath.query(jsonInput, '$.tags.*.display_name') | ||
const keywords = tags.join(', ') | ||
return keywords | ||
} | ||
|
||
function getDistributions (jsonInput) { | ||
const distributions = jsonpath.query(jsonInput, '$.resources.*') | ||
const mappedDistributions = distributions.map((val, i) => mapDistribution(val, i)) | ||
const result = {} | ||
mappedDistributions.forEach(distribution => { | ||
mergeObjects(result, distribution) | ||
}) | ||
return result | ||
} | ||
|
||
function mapDistribution (distribution, i) { | ||
return { | ||
[`distribution.${i}.downloadURL`]: distribution.url, | ||
[`distribution.${i}.format`]: distribution.format, | ||
[`distribution.${i}.size`]: distribution.size, | ||
[`distribution.${i}.title`]: distribution.name | ||
} | ||
} |
Oops, something went wrong.