diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 9f18408d..2e7fe17d 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -133,6 +133,9 @@ def __init__(self, graph, compatibility_mode=False): # _license(). self._licenceregister_cache = None + # Cache for Organizations + self._org_cache = None + def _datasets(self): ''' Generator that returns all DCAT datasets on the graph @@ -727,14 +730,16 @@ def _add_list_triples_from_dict(self, _dict, subject, items): def _add_triples_from_dict(self, _dict, subject, items, list_value=False, - date_value=False): + date_value=False, + translated=False): for item in items: key, predicate, fallbacks, _type = item self._add_triple_from_dict(_dict, subject, predicate, key, fallbacks=fallbacks, list_value=list_value, date_value=date_value, - _type=_type) + _type=_type, + translated=translated) def _add_triple_from_dict(self, _dict, subject, predicate, key, fallbacks=None, @@ -742,7 +747,8 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key, date_value=False, _type=Literal, _datatype=None, - value_modifier=None): + value_modifier=None, + translated=False): ''' Adds a new triple to the graph with the provided parameters @@ -776,6 +782,12 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key, self._add_date_triple(subject, predicate, value, _type) elif value: # Normal text value + if translated and isinstance(value, dict): + # We assume that all translated field values are Literals + for lang, translated_value in value.items(): + object = Literal(translated_value, lang=lang) + self.g.add((subject, predicate, object)) + return # ensure URIRef items are preprocessed (space removal/url encoding) if _type == URIRef: _type = CleanedURIRef @@ -1207,10 +1219,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): g.add((dataset_ref, RDF.type, DCAT.Dataset)) + # Multilingual fields + title_key = 'title_translated' if 'title_translated' in dataset_dict else 'title' + notes_key = 'notes_translated' if 'notes_translated' in dataset_dict else 'notes' + items = [ + (title_key, DCT.title, None, Literal), + (notes_key, DCT.description, None, Literal), + ] + self._add_triples_from_dict(dataset_dict, dataset_ref, items, translated=True) + # Basic fields items = [ - ('title', DCT.title, None, Literal), - ('notes', DCT.description, None, Literal), ('url', DCAT.landingPage, None, URIRef), ('identifier', DCT.identifier, ['guid', 'id'], URIRefOrLiteral), ('version', OWL.versionInfo, ['dcat_version'], Literal), @@ -1223,8 +1242,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags - for tag in dataset_dict.get('tags', []): - g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) + tags = dataset_dict.get('tags_translated', dataset_dict.get('tags', [])) + for tag in tags: + if 'name' in tag: + g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) + else: + # translated tags are stored as {'lang': ['tag1', 'tag2', ...]} + for translated_value in tags[tag]: + g.add((dataset_ref, DCAT.keyword, Literal(translated_value, lang=tag))) # Dates items = [ @@ -1306,9 +1331,23 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # If no name but an URI is available, the name literal remains empty to # avoid mixing organization and dataset values. if not publisher_name and not publisher_uri and dataset_dict.get('organization'): - publisher_name = dataset_dict['organization']['title'] - - g.add((publisher_details, FOAF.name, Literal(publisher_name))) + org_id = dataset_dict["organization"]["id"] + org_dict = None + if org_id in self._org_cache: + org_dict = self._org_cache[org_id] + else: + try: + org_dict = toolkit.get_action(u'organization_show')({u'ignore_auth': True}, + {u'id': org_id}) + self._org_cache[org_id] = org_dict + except toolkit.ObjectNotFound: + pass + if org_dict: + title_key = 'title_translated' if 'title_translated' in org_dict else 'title' + items = [(title_key, FOAF.name, None, Literal)] + self._add_triples_from_dict(org_dict, publisher_details, items, translated=True) + else: + g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include @@ -1364,10 +1403,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): g.add((distribution, RDF.type, DCAT.Distribution)) + # Multilingual fields + name_key = 'name_translated' if 'name_translated' in resource_dict else 'name' + description_key = 'description_translated' if 'description_translated' in resource_dict else 'description' + items = [ + (name_key, DCT.title, None, Literal), + (description_key, DCT.description, None, Literal), + ] + self._add_triples_from_dict(resource_dict, distribution, items, translated=True) + # Simple values items = [ - ('name', DCT.title, None, Literal), - ('description', DCT.description, None, Literal), ('status', ADMS.status, None, URIRefOrLiteral), ('rights', DCT.rights, None, URIRefOrLiteral), ('license', DCT.license, None, URIRefOrLiteral), diff --git a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py index e0f7d3c0..8944fc38 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py @@ -17,7 +17,7 @@ from ckanext.dcat import utils from ckanext.dcat.processors import RDFSerializer from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA, - SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT, + SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT, DISTRIBUTION_LICENSE_FALLBACK_CONFIG) from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS from ckanext.dcat.tests.utils import BaseSerializeTest @@ -398,13 +398,15 @@ def test_publisher_extras(self): assert self._triple(g, publisher, DCT.type, URIRef(extras['publisher_type'])) def test_publisher_org(self): + org = factories.Organization() + dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { - 'id': '', - 'name': 'publisher1', - 'title': 'Example Publisher from Org', + 'id': org['id'], + 'name': org['name'], + 'title': org['title'], } }