From ad0aaba297a90a4e6f662f4191129f19f3625dee Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Mon, 30 Jan 2023 16:54:58 -0500 Subject: [PATCH 1/4] new: cast title to string always A title of '707' is okay to have, so even if json doesn't recognize it as a string, convert it after loading --- ckanext/datajson/datajson.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/datajson/datajson.py b/ckanext/datajson/datajson.py index aa1d913e..570f25ba 100644 --- a/ckanext/datajson/datajson.py +++ b/ckanext/datajson/datajson.py @@ -460,6 +460,9 @@ def import_stage(self, harvest_object): return True dataset = json.loads(harvest_object.content) + # Ensure title is a string for munging/manipulation + # https://github.com/GSA/data.gov/issues/4172 + dataset['title'] = str(dataset['title']) schema_version = '1.0' # default to '1.0' is_collection = False parent_pkg_id = '' From d07a082f90790e552456f4135911a394eaa5a403 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Mon, 30 Jan 2023 17:00:46 -0500 Subject: [PATCH 2/4] new: add test of numerical title --- .../datajson-samples/numerical-title.json | 43 +++++++++++++++++++ .../tests/test_datajson_ckan_all_harvester.py | 7 +++ 2 files changed, 50 insertions(+) create mode 100644 ckanext/datajson/tests/datajson-samples/numerical-title.json diff --git a/ckanext/datajson/tests/datajson-samples/numerical-title.json b/ckanext/datajson/tests/datajson-samples/numerical-title.json new file mode 100644 index 00000000..7ec8b9be --- /dev/null +++ b/ckanext/datajson/tests/datajson-samples/numerical-title.json @@ -0,0 +1,43 @@ +{ + "@type": "dcat:Catalog", + "describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json", + "conformsTo": "https://project-open-data.cio.gov/v1.1/schema", + "@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld", + "dataset": [ + { + "identifier": "null-spatial", + "accessLevel": "public", + "contactPoint": { + "hasEmail": "mailto:Alexis.Graves@ocio.usda.gov", + "@type": "vcard:Contact", + "fn": "Nicole Numbi" + }, + "programCode": [ + "005:059" + ], + "description": "Sample dataset. Spatial can be null", + "title": 707, + "distribution": [ + { + "@type": "dcat:Distribution", + "downloadURL": "http://www.dm.usda.gov/foia/docs/Copy%20of%20ECM%20Congressional%20Logs%20FY14.xls", + "mediaType": "application/vnd.ms-excel", + "title": "Congressional Logs for Fiscal Year 2014" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "bureauCode": [ + "005:12" + ], + "modified": "2014-10-03", + "publisher": { + "@type": "org:Organization", + "name": "Department of Agriculture" + }, + "spatial": null, + "keyword": [ + "Congressional Logs" + ] + } + ] +} diff --git a/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py b/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py index cc230c28..5861b8f0 100644 --- a/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py +++ b/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py @@ -637,3 +637,10 @@ def test_datajson_null_spatial(self): dataset = datasets[0] expected_title = "Sample Title NUll Spatial" assert dataset.title == expected_title + + def test_datajson_numerical_title(self): + url = 'http://127.0.0.1:%s/numerical-title' % self.mock_port + datasets = self.run_source(url=url) + dataset = datasets[0] + expected_title = "707" + assert dataset.title == expected_title From c7009d2110f9aa46f3d03276b5d15c745752f72c Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Mon, 30 Jan 2023 17:47:35 -0500 Subject: [PATCH 3/4] fix: add new data.json to host --- .../{numerical-title.json => numerical-title.data.json} | 0 ckanext/datajson/tests/mock_datajson_source.py | 3 +++ 2 files changed, 3 insertions(+) rename ckanext/datajson/tests/datajson-samples/{numerical-title.json => numerical-title.data.json} (100%) diff --git a/ckanext/datajson/tests/datajson-samples/numerical-title.json b/ckanext/datajson/tests/datajson-samples/numerical-title.data.json similarity index 100% rename from ckanext/datajson/tests/datajson-samples/numerical-title.json rename to ckanext/datajson/tests/datajson-samples/numerical-title.data.json diff --git a/ckanext/datajson/tests/mock_datajson_source.py b/ckanext/datajson/tests/mock_datajson_source.py index 9bd1d99a..7519704f 100644 --- a/ckanext/datajson/tests/mock_datajson_source.py +++ b/ckanext/datajson/tests/mock_datajson_source.py @@ -41,6 +41,9 @@ def do_GET(self): elif self.path == '/null-spatial': self.sample_datajson_file = 'null-spatial.data.json' self.test_name = 'null-spatial' + elif self.path == '/numerical-title': + self.sample_datajson_file = 'numerical-title.data.json' + self.test_name = 'numerical-title' elif self.path == '/text': self.test_name = 'test' self.respond('abc123', status=200) From 687e8b48b740cb95d10808ff14bb8d66da35ca0f Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Mon, 30 Jan 2023 17:48:43 -0500 Subject: [PATCH 4/4] new: update setup.py version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 831c6ed5..988a66c0 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='ckanext-datajson', - version='0.1.11', + version='0.1.12', description="CKAN extension to generate /data.json", long_description=long_description, long_description_content_type='text/markdown',