Skip to content

Commit

Permalink
Improve serialization of statements
Browse files Browse the repository at this point in the history
Rather than just include a Literal, add it as a node
(with a class if provided) with a RDFS.label property, eg:

    dct:accessRights [ a dct:RightsStatement ;
	rdfs:label "Statement about access rights" ] ;

An URI can also be used:

    dct:accessRights <https://example.org/vocab/access-right/TODO/PUBLIC> ;

    [...]

    <https://example.org/vocab/access-right/TODO/PUBLIC> a dct:RightsStatement .

This is in line with the recommendation in the DCAT 3 spec:

https://www.w3.org/TR/vocab-dcat-3/#ex-license-and-access-rights
  • Loading branch information
amercader committed Oct 9, 2024
1 parent 7eff792 commit 8fd1284
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 12 deletions.
32 changes: 32 additions & 0 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,38 @@ def _add_spatial_to_dict(self, dataset_dict, key, spatial):
}
)

def _add_statement_to_graph(self, data_dict, key, subject, predicate, _class=None):
"""
Adds a statement property to the graph.
If it is a Literal value, it is added as a node (with a class if provided)
with a RDFS.label property, eg:
dct:accessRights [ a dct:RightsStatement ;
rdfs:label "Statement about access rights" ] ;
An URI can also be used:
dct:accessRights <https://example.org/vocab/access-right/TODO/PUBLIC> ;
[...]
<https://example.org/vocab/access-right/TODO/PUBLIC> a dct:RightsStatement .
"""
value = self._get_dict_value(data_dict, key)
if value:
_object = URIRefOrLiteral(value)
if isinstance(_object, Literal):
statement_ref = BNode()
self.g.add((subject, predicate, statement_ref))
if _class:
self.g.add((statement_ref, RDF.type, _class))
self.g.add((statement_ref, RDFS.label, _object))

else:
self.g.add((subject, predicate, _object))
if _class:
self.g.add((_object, RDF.type, _class))

def _schema_field(self, key):
"""
Returns the schema field information if the provided key exists as a field in
Expand Down
29 changes: 26 additions & 3 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
("version", OWL.versionInfo, ["dcat_version"], Literal),
("version_notes", ADMS.versionNotes, None, Literal),
("frequency", DCT.accrualPeriodicity, None, URIRefOrLiteral, DCT.Frequency),
("access_rights", DCT.accessRights, None, URIRefOrLiteral, DCT.AccessRights),
("dcat_type", DCT.type, None, URIRefOrLiteral),
("provenance", DCT.provenance, None, URIRefOrLiteral, DCT.ProvenanceStatement),
]
self._add_triples_from_dict(dataset_dict, dataset_ref, items)

Expand Down Expand Up @@ -500,6 +498,23 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
):
resource_license_fallback = dataset_dict["license_url"]

# Statetements
self._add_statement_to_graph(
dataset_dict,
"access_rights",
dataset_ref,
DCT.accessRights,
DCT.RightsStatement
)

self._add_statement_to_graph(
dataset_dict,
"provenance",
dataset_ref,
DCT.provenance,
DCT.ProvenanceStatement
)

# Resources
for resource_dict in dataset_dict.get("resources", []):

Expand All @@ -514,7 +529,6 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
("name", DCT.title, None, Literal),
("description", DCT.description, None, Literal),
("status", ADMS.status, None, URIRefOrLiteral),
("rights", DCT.rights, None, URIRefOrLiteral, DCT.RightsStatement),
("license", DCT.license, None, URIRefOrLiteral, DCT.LicenseDocument),
("access_url", DCAT.accessURL, None, URIRef, RDFS.Resource),
("download_url", DCAT.downloadURL, None, URIRef, RDFS.Resource),
Expand All @@ -530,6 +544,15 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
]
self._add_list_triples_from_dict(resource_dict, distribution, items)

# Statetements
self._add_statement_to_graph(
resource_dict,
"rights",
distribution,
DCT.rights,
DCT.RightsStatement
)

# Set default license for distribution if needed and available

if resource_license_fallback and not (distribution, DCT.license, None) in g:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ckanext.dcat.profiles import (
DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
RDFS,
)
from ckanext.dcat.profiles.euro_dcat_ap_base import DISTRIBUTION_LICENSE_FALLBACK_CONFIG
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
Expand Down Expand Up @@ -121,8 +122,6 @@ def test_graph_from_dataset(self):
assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version'])
assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes'])
assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency'])
assert self._triple(g, dataset_ref, DCT.accessRights, extras['access_rights'])
assert self._triple(g, dataset_ref, DCT.provenance, extras['provenance'])
assert self._triple(g, dataset_ref, DCT.type, extras['dcat_type'])

# Tags
Expand Down Expand Up @@ -156,6 +155,14 @@ def test_graph_from_dataset(self):
_type = item[2][num]
assert self._triple(g, dataset_ref, item[1], _type(value))

# Statements
for item in [
('access_rights', DCT.accessRights),
('provenance', DCT.provenance),
]:
statement = [s for s in g.objects(dataset_ref, item[1])][0]
assert self._triple(g, statement, RDFS.label, extras[item[0]])

def test_identifier_extra(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
Expand Down Expand Up @@ -670,7 +677,6 @@ def test_distribution_fields(self):
assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
assert self._triple(g, distribution, DCT.title, resource['name'])
assert self._triple(g, distribution, DCT.description, resource['description'])
assert self._triple(g, distribution, DCT.rights, resource['rights'])
assert self._triple(g, distribution, DCT.license, URIRef(resource['license']))
assert self._triple(g, distribution, ADMS.status, URIRef(resource['status']))

Expand Down Expand Up @@ -703,6 +709,10 @@ def test_distribution_fields(self):
assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
assert self._triple(g, checksum, SPDX.algorithm, URIRef(resource['hash_algorithm']))

# Statements
statement = [s for s in g.objects(distribution, DCT.rights)][0]
assert self._triple(g, statement, RDFS.label, resource['rights'])

def test_distribution_size_not_number(self):

resource = {
Expand Down
16 changes: 13 additions & 3 deletions ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
GSP,
OWL,
SPDX,
RDFS,
)
from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest

Expand Down Expand Up @@ -76,10 +77,8 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(
g, dataset_ref, DCT.accrualPeriodicity, dataset["frequency"]
)
assert self._triple(g, dataset_ref, DCT.provenance, dataset["provenance"])
assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"])
assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"])
assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"])
assert self._triple(
g,
dataset_ref,
Expand Down Expand Up @@ -274,6 +273,14 @@ def test_e2e_ckan_to_dcat(self):
distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
resource = dataset_dict["resources"][0]

# Statements
for item in [
('access_rights', DCT.accessRights),
('provenance', DCT.provenance),
]:
statement = [s for s in g.objects(dataset_ref, item[1])][0]
assert self._triple(g, statement, RDFS.label, dataset[item[0]])

# Resources: core fields

assert self._triple(g, distribution_ref, DCT.title, resource["name"])
Expand All @@ -286,7 +293,6 @@ def test_e2e_ckan_to_dcat(self):

# Resources: standard fields

assert self._triple(g, distribution_ref, DCT.rights, resource["rights"])
assert self._triple(
g, distribution_ref, ADMS.status, URIRef(resource["status"])
)
Expand Down Expand Up @@ -380,6 +386,10 @@ def test_e2e_ckan_to_dcat(self):
]
assert endpoint_urls == resource["access_services"][0]["endpoint_url"]

# Resources: statements
statement = [s for s in g.objects(distribution_ref, DCT.rights)][0]
assert self._triple(g, statement, RDFS.label, resource['rights'])

def test_publisher_fallback_org(self):

org = factories.Organization(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
GSP,
OWL,
SPDX,
RDFS,
)

DCAT_AP_PROFILES = ["euro_dcat_ap_3"]
Expand Down Expand Up @@ -74,10 +75,8 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(
g, dataset_ref, DCT.accrualPeriodicity, dataset["frequency"]
)
assert self._triple(g, dataset_ref, DCT.provenance, dataset["provenance"])
assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"])
assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"])
assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"])
assert self._triple(
g,
dataset_ref,
Expand Down Expand Up @@ -230,6 +229,14 @@ def test_e2e_ckan_to_dcat(self):
distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
resource = dataset_dict["resources"][0]

# Statements
for item in [
('access_rights', DCT.accessRights),
('provenance', DCT.provenance),
]:
statement = [s for s in g.objects(dataset_ref, item[1])][0]
assert self._triple(g, statement, RDFS.label, dataset[item[0]])

# Alternate identifiers
ids = []
for subject in [t[2] for t in g.triples((dataset_ref, ADMS.identifier, None))]:
Expand All @@ -248,7 +255,6 @@ def test_e2e_ckan_to_dcat(self):

# Resources: standard fields

assert self._triple(g, distribution_ref, DCT.rights, resource["rights"])
assert self._triple(
g, distribution_ref, ADMS.status, URIRef(resource["status"])
)
Expand Down Expand Up @@ -343,6 +349,10 @@ def test_e2e_ckan_to_dcat(self):
]
assert endpoint_urls == resource["access_services"][0]["endpoint_url"]

# Resources: statements
statement = [s for s in g.objects(distribution_ref, DCT.rights)][0]
assert self._triple(g, statement, RDFS.label, resource['rights'])

def test_byte_size_non_negative_integer(self):

dataset = {
Expand Down

0 comments on commit 8fd1284

Please sign in to comment.