From 8fd12846f81eadd7542964ac809473c3fa6689b9 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 9 Oct 2024 12:18:02 +0200 Subject: [PATCH] Improve serialization of statements Rather than just include a Literal, add it as a node (with a class if provided) with a RDFS.label property, eg: dct:accessRights [ a dct:RightsStatement ; rdfs:label "Statement about access rights" ] ; An URI can also be used: dct:accessRights ; [...] a dct:RightsStatement . This is in line with the recommendation in the DCAT 3 spec: https://www.w3.org/TR/vocab-dcat-3/#ex-license-and-access-rights --- ckanext/dcat/profiles/base.py | 32 +++++++++++++++++++ ckanext/dcat/profiles/euro_dcat_ap_base.py | 29 +++++++++++++++-- .../test_euro_dcatap_profile_serialize.py | 16 ++++++++-- .../dcat_ap_2/test_scheming_support.py | 16 ++++++++-- .../test_euro_dcatap_3_profile_serialize.py | 16 ++++++++-- 5 files changed, 97 insertions(+), 12 deletions(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 15e26ecb..51a582c1 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -757,6 +757,38 @@ def _add_spatial_to_dict(self, dataset_dict, key, spatial): } ) + def _add_statement_to_graph(self, data_dict, key, subject, predicate, _class=None): + """ + Adds a statement property to the graph. + If it is a Literal value, it is added as a node (with a class if provided) + with a RDFS.label property, eg: + + dct:accessRights [ a dct:RightsStatement ; + rdfs:label "Statement about access rights" ] ; + + An URI can also be used: + + dct:accessRights ; + + [...] + + a dct:RightsStatement . + """ + value = self._get_dict_value(data_dict, key) + if value: + _object = URIRefOrLiteral(value) + if isinstance(_object, Literal): + statement_ref = BNode() + self.g.add((subject, predicate, statement_ref)) + if _class: + self.g.add((statement_ref, RDF.type, _class)) + self.g.add((statement_ref, RDFS.label, _object)) + + else: + self.g.add((subject, predicate, _object)) + if _class: + self.g.add((_object, RDF.type, _class)) + def _schema_field(self, key): """ Returns the schema field information if the provided key exists as a field in diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index f1db48b6..12aedab5 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -282,9 +282,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ("version", OWL.versionInfo, ["dcat_version"], Literal), ("version_notes", ADMS.versionNotes, None, Literal), ("frequency", DCT.accrualPeriodicity, None, URIRefOrLiteral, DCT.Frequency), - ("access_rights", DCT.accessRights, None, URIRefOrLiteral, DCT.AccessRights), ("dcat_type", DCT.type, None, URIRefOrLiteral), - ("provenance", DCT.provenance, None, URIRefOrLiteral, DCT.ProvenanceStatement), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) @@ -500,6 +498,23 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ): resource_license_fallback = dataset_dict["license_url"] + # Statetements + self._add_statement_to_graph( + dataset_dict, + "access_rights", + dataset_ref, + DCT.accessRights, + DCT.RightsStatement + ) + + self._add_statement_to_graph( + dataset_dict, + "provenance", + dataset_ref, + DCT.provenance, + DCT.ProvenanceStatement + ) + # Resources for resource_dict in dataset_dict.get("resources", []): @@ -514,7 +529,6 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ("name", DCT.title, None, Literal), ("description", DCT.description, None, Literal), ("status", ADMS.status, None, URIRefOrLiteral), - ("rights", DCT.rights, None, URIRefOrLiteral, DCT.RightsStatement), ("license", DCT.license, None, URIRefOrLiteral, DCT.LicenseDocument), ("access_url", DCAT.accessURL, None, URIRef, RDFS.Resource), ("download_url", DCAT.downloadURL, None, URIRef, RDFS.Resource), @@ -530,6 +544,15 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ] self._add_list_triples_from_dict(resource_dict, distribution, items) + # Statetements + self._add_statement_to_graph( + resource_dict, + "rights", + distribution, + DCT.rights, + DCT.RightsStatement + ) + # Set default license for distribution if needed and available if resource_license_fallback and not (distribution, DCT.license, None) in g: diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py index ddbf9e21..9ba200be 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py @@ -21,6 +21,7 @@ from ckanext.dcat.profiles import ( DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA, SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT, + RDFS, ) from ckanext.dcat.profiles.euro_dcat_ap_base import DISTRIBUTION_LICENSE_FALLBACK_CONFIG from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS @@ -121,8 +122,6 @@ def test_graph_from_dataset(self): assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) - assert self._triple(g, dataset_ref, DCT.accessRights, extras['access_rights']) - assert self._triple(g, dataset_ref, DCT.provenance, extras['provenance']) assert self._triple(g, dataset_ref, DCT.type, extras['dcat_type']) # Tags @@ -156,6 +155,14 @@ def test_graph_from_dataset(self): _type = item[2][num] assert self._triple(g, dataset_ref, item[1], _type(value)) + # Statements + for item in [ + ('access_rights', DCT.accessRights), + ('provenance', DCT.provenance), + ]: + statement = [s for s in g.objects(dataset_ref, item[1])][0] + assert self._triple(g, statement, RDFS.label, extras[item[0]]) + def test_identifier_extra(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', @@ -670,7 +677,6 @@ def test_distribution_fields(self): assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name']) assert self._triple(g, distribution, DCT.description, resource['description']) - assert self._triple(g, distribution, DCT.rights, resource['rights']) assert self._triple(g, distribution, DCT.license, URIRef(resource['license'])) assert self._triple(g, distribution, ADMS.status, URIRef(resource['status'])) @@ -703,6 +709,10 @@ def test_distribution_fields(self): assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, URIRef(resource['hash_algorithm'])) + # Statements + statement = [s for s in g.objects(distribution, DCT.rights)][0] + assert self._triple(g, statement, RDFS.label, resource['rights']) + def test_distribution_size_not_number(self): resource = { diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index 65b2c074..507c762c 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -25,6 +25,7 @@ GSP, OWL, SPDX, + RDFS, ) from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest @@ -76,10 +77,8 @@ def test_e2e_ckan_to_dcat(self): assert self._triple( g, dataset_ref, DCT.accrualPeriodicity, dataset["frequency"] ) - assert self._triple(g, dataset_ref, DCT.provenance, dataset["provenance"]) assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"]) assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"]) - assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"]) assert self._triple( g, dataset_ref, @@ -274,6 +273,14 @@ def test_e2e_ckan_to_dcat(self): distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2] resource = dataset_dict["resources"][0] + # Statements + for item in [ + ('access_rights', DCT.accessRights), + ('provenance', DCT.provenance), + ]: + statement = [s for s in g.objects(dataset_ref, item[1])][0] + assert self._triple(g, statement, RDFS.label, dataset[item[0]]) + # Resources: core fields assert self._triple(g, distribution_ref, DCT.title, resource["name"]) @@ -286,7 +293,6 @@ def test_e2e_ckan_to_dcat(self): # Resources: standard fields - assert self._triple(g, distribution_ref, DCT.rights, resource["rights"]) assert self._triple( g, distribution_ref, ADMS.status, URIRef(resource["status"]) ) @@ -380,6 +386,10 @@ def test_e2e_ckan_to_dcat(self): ] assert endpoint_urls == resource["access_services"][0]["endpoint_url"] + # Resources: statements + statement = [s for s in g.objects(distribution_ref, DCT.rights)][0] + assert self._triple(g, statement, RDFS.label, resource['rights']) + def test_publisher_fallback_org(self): org = factories.Organization( diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py index 893ce1d4..a17e2cc9 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py @@ -24,6 +24,7 @@ GSP, OWL, SPDX, + RDFS, ) DCAT_AP_PROFILES = ["euro_dcat_ap_3"] @@ -74,10 +75,8 @@ def test_e2e_ckan_to_dcat(self): assert self._triple( g, dataset_ref, DCT.accrualPeriodicity, dataset["frequency"] ) - assert self._triple(g, dataset_ref, DCT.provenance, dataset["provenance"]) assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"]) assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"]) - assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"]) assert self._triple( g, dataset_ref, @@ -230,6 +229,14 @@ def test_e2e_ckan_to_dcat(self): distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2] resource = dataset_dict["resources"][0] + # Statements + for item in [ + ('access_rights', DCT.accessRights), + ('provenance', DCT.provenance), + ]: + statement = [s for s in g.objects(dataset_ref, item[1])][0] + assert self._triple(g, statement, RDFS.label, dataset[item[0]]) + # Alternate identifiers ids = [] for subject in [t[2] for t in g.triples((dataset_ref, ADMS.identifier, None))]: @@ -248,7 +255,6 @@ def test_e2e_ckan_to_dcat(self): # Resources: standard fields - assert self._triple(g, distribution_ref, DCT.rights, resource["rights"]) assert self._triple( g, distribution_ref, ADMS.status, URIRef(resource["status"]) ) @@ -343,6 +349,10 @@ def test_e2e_ckan_to_dcat(self): ] assert endpoint_urls == resource["access_services"][0]["endpoint_url"] + # Resources: statements + statement = [s for s in g.objects(distribution_ref, DCT.rights)][0] + assert self._triple(g, statement, RDFS.label, resource['rights']) + def test_byte_size_non_negative_integer(self): dataset = {