Skip to content

Commit

Permalink
Merge pull request #302 from Health-RI/feature/add-creator-support
Browse files Browse the repository at this point in the history
Feature/add creator support
  • Loading branch information
amercader authored Oct 4, 2024
2 parents 8f0f883 + d8461e2 commit 0c5b294
Show file tree
Hide file tree
Showing 17 changed files with 360 additions and 153 deletions.
52 changes: 45 additions & 7 deletions ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,31 @@ def dcat_to_ckan(dcat_dict):
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})
if dcat_publisher.get('email'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('email')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

dcat_creator = dcat_dict.get('creator')
if isinstance(dcat_creator, basestring):
package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator})
elif isinstance(dcat_creator, dict) and dcat_creator.get('name'):
if dcat_creator.get('name'):
package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator.get('name')})

if dcat_creator.get('email'):
package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('email')})

if dcat_creator.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_creator_id',
'value': dcat_creator.get('identifier')
})

package_dict['extras'].append({
'key': 'language',
'value': ','.join(dcat_dict.get('language', []))
Expand All @@ -63,20 +79,20 @@ def dcat_to_ckan(dcat_dict):


def ckan_to_dcat(package_dict):

dcat_dict = {}

dcat_dict['title'] = package_dict.get('title')
dcat_dict['description'] = package_dict.get('notes')
dcat_dict['landingPage'] = package_dict.get('url')


# Keywords
dcat_dict['keyword'] = []
for tag in package_dict.get('tags', []):
dcat_dict['keyword'].append(tag['name'])


# Publisher
dcat_dict['publisher'] = {}
dcat_dict['creator'] = {}

for extra in package_dict.get('extras', []):
if extra['key'] in ['dcat_issued', 'dcat_modified']:
Expand All @@ -85,19 +101,41 @@ def ckan_to_dcat(package_dict):
elif extra['key'] == 'language':
dcat_dict['language'] = extra['value'].split(',')

# Publisher fields
elif extra['key'] == 'dcat_publisher_name':
dcat_dict['publisher']['name'] = extra['value']

elif extra['key'] == 'dcat_publisher_email':
dcat_dict['publisher']['mbox'] = extra['value']
dcat_dict['publisher']['email'] = extra['value']

elif extra['key'] == 'dcat_publisher_id':
dcat_dict['publisher']['identifier'] = extra['value']

# Creator fields
elif extra['key'] == 'dcat_creator_name':
dcat_dict['creator']['name'] = extra['value']

elif extra['key'] == 'dcat_creator_email':
dcat_dict['creator']['email'] = extra['value']

elif extra['key'] == 'dcat_creator_id':
dcat_dict['creator']['identifier'] = extra['value']

# Identifier
elif extra['key'] == 'guid':
dcat_dict['identifier'] = extra['value']

# Fallback for publisher (if no name in extras, use maintainer)
if not dcat_dict['publisher'].get('name') and package_dict.get('maintainer'):
dcat_dict['publisher']['name'] = package_dict.get('maintainer')
if package_dict.get('maintainer_email'):
dcat_dict['publisher']['mbox'] = package_dict.get('maintainer_email')
dcat_dict['publisher']['email'] = package_dict.get('maintainer_email')

# Fallback for creator (if no name in extras, optionally use author)
if not dcat_dict['creator'].get('name') and package_dict.get('author'):
dcat_dict['creator']['name'] = package_dict.get('author')
if package_dict.get('author_email'):
dcat_dict['creator']['email'] = package_dict.get('author_email')

dcat_dict['distribution'] = []
for resource in package_dict.get('resources', []):
Expand Down
49 changes: 14 additions & 35 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,58 +419,37 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})

def _publisher(self, subject, predicate):
def _agent_details(self, subject, predicate):
"""
Returns a dict with details about a dct:publisher entity, a foaf:Agent
Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent
Both subject and predicate must be rdflib URIRef or BNode objects
Examples:
<dct:publisher>
<dct:publisher> or <dct:creator>
<foaf:Organization rdf:about="http://orgs.vocab.org/some-org">
<foaf:name>Publishing Organization for dataset 1</foaf:name>
<foaf:mbox>[email protected]</foaf:mbox>
<foaf:homepage>http://some.org</foaf:homepage>
<dct:type rdf:resource="http://purl.org/adms/publishertype/NonProfitOrganisation"/>
</foaf:Organization>
</dct:publisher>
{
'uri': 'http://orgs.vocab.org/some-org',
'name': 'Publishing Organization for dataset 1',
'email': '[email protected]',
'url': 'http://some.org',
'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
}
<dct:publisher rdf:resource="http://publications.europa.eu/resource/authority/corporate-body/EURCOU" />
{
'uri': 'http://publications.europa.eu/resource/authority/corporate-body/EURCOU'
}
Returns keys for uri, name, email, url and type with the values set to
an empty string if they could not be found
Returns keys for uri, name, email, url, type, and identifier with the values set to
an empty string if they could not be found.
"""

publisher = {}
agent_details = {}

for agent in self.g.objects(subject, predicate):
agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""
agent_details["name"] = self._object_value(agent, FOAF.name)
agent_details["email"] = self._object_value(agent, FOAF.mbox)
agent_details["url"] = self._object_value(agent, FOAF.homepage)
agent_details["type"] = self._object_value(agent, DCT.type)
agent_details['identifier'] = self._object_value(agent, DCT.identifier)

publisher["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""

publisher["name"] = self._object_value(agent, FOAF.name)

publisher["email"] = self._object_value(agent, FOAF.mbox)

publisher["url"] = self._object_value(agent, FOAF.homepage)

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)

return publisher
return agent_details

def _contact_details(self, subject, predicate):
"""
Expand Down Expand Up @@ -1136,7 +1115,7 @@ def _extract_catalog_dict(self, catalog_ref):
out.append(
{
"key": "source_catalog_publisher",
"value": json.dumps(self._publisher(catalog_ref, DCT.publisher)),
"value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)),
}
)
return out
Expand Down
54 changes: 51 additions & 3 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

config = toolkit.config


DISTRIBUTION_LICENSE_FALLBACK_CONFIG = "ckanext.dcat.resource.inherit.license"


Expand Down Expand Up @@ -122,13 +121,21 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
)

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
publisher = self._agent_details(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)

# Creator
creator = self._agent_details(dataset_ref, DCT.creator)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if creator.get(key):
dataset_dict["extras"].append(
{"key": "creator_{0}".format(key), "value": creator.get(key)}
)

# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
if start:
Expand Down Expand Up @@ -250,7 +257,6 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
"publisher_name",
"publisher_email",
):

extra["key"] = "dcat_" + extra["key"]

if extra["key"] == "language":
Expand Down Expand Up @@ -412,6 +418,48 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)

# Creator
creator_ref = None

if dataset_dict.get("creator"):
# Scheming publisher field: will be handled in a separate profile
pass
elif any(
[
self._get_dataset_value(dataset_dict, "creator_uri"),
self._get_dataset_value(dataset_dict, "creator_name"),
]
):
# Legacy creator_* extras
creator_uri = self._get_dataset_value(dataset_dict, "creator_uri")
creator_name = self._get_dataset_value(dataset_dict, "creator_name")
if creator_uri:
creator_ref = CleanedURIRef(creator_uri)
else:
# No creator_uri
creator_ref = BNode()

creator_details = {
"name": creator_name,
"email": self._get_dataset_value(dataset_dict, "creator_email"),
"url": self._get_dataset_value(dataset_dict, "creator_url"),
"type": self._get_dataset_value(dataset_dict, "creator_type"),
"identifier": self._get_dataset_value(dataset_dict, "creator_identifier"),
}

# Add to graph
if creator_ref:
g.add((creator_ref, RDF.type, FOAF.Agent))
g.add((dataset_ref, DCT.creator, creator_ref)) # Use DCT.creator for creator
items = [
("name", FOAF.name, None, Literal),
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
("identifier", DCT.identifier, None, URIRefOrLiteral),
]
self._add_triples_from_dict(creator_details, creator_ref, items)

# Temporal
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
Expand Down
Loading

0 comments on commit 0c5b294

Please sign in to comment.