Skip to content

Commit

Permalink
Merge branch 'master' into multilingual
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Oct 31, 2024
2 parents 785d787 + 1e945b6 commit 2ca23ee
Show file tree
Hide file tree
Showing 8 changed files with 338 additions and 137 deletions.
30 changes: 20 additions & 10 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,10 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})

def _agent_details(self, subject, predicate):
def _agents_details(self, subject, predicate):
"""
Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent
Returns a list of dicts with details about a foaf:Agent property, e.g.
dct:publisher or dct:creator entity.
Both subject and predicate must be rdflib URIRef or BNode objects
Expand All @@ -528,32 +529,37 @@ def _agent_details(self, subject, predicate):
an empty string if they could not be found.
"""

agent_details = {}

agents = []
for agent in self.g.objects(subject, predicate):
agent_details = {}
agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""
agent_details["name"] = self._object_value(agent, FOAF.name)
agent_details["email"] = self._object_value(agent, FOAF.mbox)
if not agent_details["email"]:
agent_details["email"] = self._without_mailto(
self._object_value(agent, VCARD.hasEmail)
)
agent_details["url"] = self._object_value(agent, FOAF.homepage)
agent_details["type"] = self._object_value(agent, DCT.type)
agent_details['identifier'] = self._object_value(agent, DCT.identifier)
agents.append(agent_details)

return agent_details
return agents

def _contact_details(self, subject, predicate):
"""
Returns a dict with details about a vcard expression
Returns a list of dicts with details about vcard expressions
Both subject and predicate must be rdflib URIRef or BNode objects
Returns keys for uri, name and email with the values set to
an empty string if they could not be found
"""

contact = {}

contacts = []
for agent in self.g.objects(subject, predicate):

contact = {}
contact["uri"] = str(agent) if isinstance(agent, URIRef) else ""

contact["name"] = self._get_vcard_property_value(
Expand All @@ -565,8 +571,9 @@ def _contact_details(self, subject, predicate):
)

contact["identifier"] = self._get_vcard_property_value(agent, VCARD.hasUID)
contacts.append(contact)

return contact
return contacts

def _parse_geodata(self, spatial, datatype, cur_value):
"""
Expand Down Expand Up @@ -1277,10 +1284,13 @@ def _extract_catalog_dict(self, catalog_ref):
if val:
out.append({"key": key, "value": val})

publishers = self._agents_details(catalog_ref, DCT.publisher)
if publishers:
publisher = publishers[0]
out.append(
{
"key": "source_catalog_publisher",
"value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)),
"value": json.dumps(publisher),
}
)
return out
Expand Down
62 changes: 36 additions & 26 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,43 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
dataset_dict["extras"].append({"key": key, "value": json.dumps(values)})

# Contact details
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contact:
# adms:contactPoint was supported on the first version of DCAT-AP
contact = self._contact_details(dataset_ref, ADMS.contactPoint)

if contact:
for key in ("uri", "name", "email", "identifier"):
if contact.get(key):
dataset_dict["extras"].append(
{"key": "contact_{0}".format(key), "value": contact.get(key)}
)

# Publisher
publisher = self._agent_details(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)
if self._schema_field("contact"):
# This is a scheming field, will be hanlded in a separate profile
pass
else:
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contact:
# adms:contactPoint was supported on the first version of DCAT-AP
contact = self._contact_details(dataset_ref, ADMS.contactPoint)
if contact:
contact = contact[0]
for key in ("uri", "name", "email", "identifier"):
if contact.get(key):
dataset_dict["extras"].append(
{
"key": "contact_{0}".format(key),
"value": contact.get(key)
}
)

# Creator
creator = self._agent_details(dataset_ref, DCT.creator)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if creator.get(key):
dataset_dict["extras"].append(
{"key": "creator_{0}".format(key), "value": creator.get(key)}
)
# Publishers and creators
for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]:
agent_key, predicate = item
if self._schema_field(agent_key):
# This is a scheming field, will be hanlded in a separate profile
pass
else:
agents = self._agents_details(dataset_ref, predicate)
if agents:
agent = agents[0]
for key in ("uri", "name", "email", "url", "type", "identifier"):
if agent.get(key):
dataset_dict["extras"].append(
{
"key": f"{agent_key}_{key}",
"value": agent.get(key)
}
)

# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
Expand Down
122 changes: 73 additions & 49 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _parse_list_value(data_dict, field_name):
check_name = new_fields_mapping.get(field_name, field_name)
for extra in dataset_dict.get("extras", []):
if extra["key"].startswith(f"{check_name}_"):
subfield = extra["key"][extra["key"].index("_") + 1:]
subfield = extra["key"][extra["key"].index("_") + 1 :]
if subfield in [
f["field_name"] for f in schema_field["repeating_subfields"]
]:
Expand All @@ -100,6 +100,18 @@ def _parse_list_value(data_dict, field_name):
dataset_dict[field_name] = [new_dict]
dataset_dict["extras"] = new_extras

# Contact details
contacts = self._contact_details(dataset_ref, DCAT.contactPoint)
if contacts:
dataset_dict["contact"] = contacts

# Publishers and creators
for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]:
key, predicate = item
agents = self._agents_details(dataset_ref, predicate)
if agents:
dataset_dict[key] = agents

# Repeating subfields: resources
for schema_field in self._dataset_schema["resource_fields"]:
if "repeating_subfields" in schema_field:
Expand All @@ -124,7 +136,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
Add triples to the graph from new repeating subfields
"""
contact = dataset_dict.get("contact")
if isinstance(contact, list) and len(contact) and self._not_empty_dict(contact[0]):
if (
isinstance(contact, list)
and len(contact)
and self._not_empty_dict(contact[0])
):
for item in contact:
contact_uri = item.get("uri")
if contact_uri:
Expand All @@ -150,11 +166,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
contact_details,
VCARD.hasUID,
"identifier",
_type=URIRefOrLiteral
_type=URIRefOrLiteral,
)

self._add_agent(dataset_ref, dataset_dict, "publisher", DCT.publisher)
self._add_agent(dataset_ref, dataset_dict, "creator", DCT.creator)
self._add_agents(dataset_ref, dataset_dict, "publisher", DCT.publisher)
self._add_agents(dataset_ref, dataset_dict, "creator", DCT.creator)

temporal = dataset_dict.get("temporal_coverage")
if (
Expand All @@ -172,7 +188,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
self.g.add((dataset_ref, DCT.temporal, temporal_ref))

spatial = dataset_dict.get("spatial_coverage")
if isinstance(spatial, list) and len(spatial) and self._not_empty_dict(spatial[0]):
if (
isinstance(spatial, list)
and len(spatial)
and self._not_empty_dict(spatial[0])
):
for item in spatial:
if item.get("uri"):
spatial_ref = CleanedURIRef(item["uri"])
Expand Down Expand Up @@ -205,55 +225,59 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
except ValueError:
pass

def _add_agent(self, dataset_ref, dataset_dict, agent_key, rdf_predicate):
def _add_agents(
self, dataset_ref, dataset_dict, agent_key, rdf_predicate, first_only=False
):
"""
Adds an agent (publisher or creator) to the RDF graph.
Adds one or more agents (e.g. publisher or creator) to the RDF graph.
:param dataset_ref: The RDF reference of the dataset
:param dataset_dict: The dataset dictionary containing agent information
:param agent_key: 'publisher' or 'creator' to specify the agent
:param rdf_predicate: The RDF predicate (DCT.publisher or DCT.creator)
:param agent_key: field name in the CKAN dict (.e.g. "publisher", "creator", etc)
:param rdf_predicate: The RDF predicate (DCT.publisher, DCT.creator, etc)
:first_only: Add the first item found only (used for 0..1 properties)
"""
agent = dataset_dict.get(agent_key)
if (
isinstance(agent, list)
and len(agent)
and self._not_empty_dict(agent[0])
):
agent = agent[0]
agent_uri = agent.get("uri")
if agent_uri:
agent_ref = CleanedURIRef(agent_uri)
else:
agent_ref = BNode()

self.g.add((agent_ref, RDF.type, FOAF.Agent))
self.g.add((dataset_ref, rdf_predicate, agent_ref))

self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name")
self._add_triple_from_dict(agent, agent_ref, FOAF.homepage, "url", _type=URIRef)
self._add_triple_from_dict(
agent,
agent_ref,
DCT.type,
"type",
_type=URIRefOrLiteral,
)
self._add_triple_from_dict(
agent,
agent_ref,
VCARD.hasEmail,
"email",
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
agent,
agent_ref,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral
)
if isinstance(agent, list) and len(agent) and self._not_empty_dict(agent[0]):
agents = [agent[0]] if first_only else agent

for agent in agents:

agent_uri = agent.get("uri")
if agent_uri:
agent_ref = CleanedURIRef(agent_uri)
else:
agent_ref = BNode()

self.g.add((agent_ref, RDF.type, FOAF.Agent))
self.g.add((dataset_ref, rdf_predicate, agent_ref))

self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name")
self._add_triple_from_dict(
agent, agent_ref, FOAF.homepage, "url", _type=URIRef
)
self._add_triple_from_dict(
agent,
agent_ref,
DCT.type,
"type",
_type=URIRefOrLiteral,
)
self._add_triple_from_dict(
agent,
agent_ref,
VCARD.hasEmail,
"email",
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
agent,
agent_ref,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral,
)

@staticmethod
def _not_empty_dict(data_dict):
Expand Down
6 changes: 4 additions & 2 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ def test_publisher_foaf(self):

p = RDFProfile(g)

publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)
publisher = p._agents_details(URIRef('http://example.org'), DCT.publisher)[0]

assert publisher['uri'] == 'http://orgs.vocab.org/some-org'
assert publisher['name'] == 'Publishing Organization for dataset 1'
Expand Down Expand Up @@ -908,7 +908,7 @@ def test_publisher_ref(self):

p = RDFProfile(g)

publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)
publisher = p._agents_details(URIRef('http://example.org'), DCT.publisher)[0]

assert publisher['uri'] == 'http://orgs.vocab.org/some-org'

Expand Down Expand Up @@ -941,6 +941,8 @@ def test_contact_details(self):

contact = p._contact_details(URIRef('http://example.org'), ADMS.contactPoint)

contact = contact[0]

assert contact['name'] == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert contact['email'] == '[email protected]'
Expand Down
Loading

0 comments on commit 2ca23ee

Please sign in to comment.