Skip to content

Commit

Permalink
Merge branch 'multiple-agents' into dcat-us-3
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Oct 23, 2024
2 parents 641bcd0 + 5c948a8 commit 2773c3c
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 41 deletions.
11 changes: 6 additions & 5 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})

def _agents_details(self, subject, predicate, first_only=False):
def _agents_details(self, subject, predicate):
"""
Returns a list of dicts with details about a foaf:Agent property, e.g.
dct:publisher or dct:creator entity.
Expand Down Expand Up @@ -466,18 +466,18 @@ def _agents_details(self, subject, predicate, first_only=False):

def _contact_details(self, subject, predicate):
"""
Returns a dict with details about a vcard expression
Returns a list of dicts with details about vcard expressions
Both subject and predicate must be rdflib URIRef or BNode objects
Returns keys for uri, name and email with the values set to
an empty string if they could not be found
"""

contact = {}

contacts = []
for agent in self.g.objects(subject, predicate):

contact = {}
contact["uri"] = str(agent) if isinstance(agent, URIRef) else ""

contact["name"] = self._get_vcard_property_value(
Expand All @@ -489,8 +489,9 @@ def _contact_details(self, subject, predicate):
)

contact["identifier"] = self._get_vcard_property_value(agent, VCARD.hasUID)
contacts.append(contact)

return contact
return contacts

def _parse_geodata(self, spatial, datatype, cur_value):
"""
Expand Down
65 changes: 36 additions & 29 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,36 +108,43 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
dataset_dict["extras"].append({"key": key, "value": json.dumps(values)})

# Contact details
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contact:
# adms:contactPoint was supported on the first version of DCAT-AP
contact = self._contact_details(dataset_ref, ADMS.contactPoint)

if contact:
for key in ("uri", "name", "email", "identifier"):
if contact.get(key):
dataset_dict["extras"].append(
{"key": "contact_{0}".format(key), "value": contact.get(key)}
)

# Publisher
publishers = self._agents_details(dataset_ref, DCT.publisher)
if publishers:
publisher = publishers[0]
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)
if self._schema_field("contact"):
# This is a scheming field, will be hanlded in a separate profile
pass
else:
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contact:
# adms:contactPoint was supported on the first version of DCAT-AP
contact = self._contact_details(dataset_ref, ADMS.contactPoint)
if contact:
contact = contact[0]
for key in ("uri", "name", "email", "identifier"):
if contact.get(key):
dataset_dict["extras"].append(
{
"key": "contact_{0}".format(key),
"value": contact.get(key)
}
)

# Creator
creators = self._agents_details(dataset_ref, DCT.creator)
for creator in creators:
for key in ("uri", "name", "email", "url", "type", "identifier"):
if creator.get(key):
dataset_dict["extras"].append(
{"key": "creator_{0}".format(key), "value": creator.get(key)}
)
# Publishers and creators
for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]:
agent_key, predicate = item
if self._schema_field(agent_key):
# This is a scheming field, will be hanlded in a separate profile
pass
else:
agents = self._agents_details(dataset_ref, predicate)
if agents:
agent = agents[0]
for key in ("uri", "name", "email", "url", "type", "identifier"):
if agent.get(key):
dataset_dict["extras"].append(
{
"key": f"{agent_key}_{key}",
"value": agent.get(key)
}
)

# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
Expand Down
14 changes: 13 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ def _parse_list_value(data_dict, field_name):
dataset_dict[field_name] = [new_dict]
dataset_dict["extras"] = new_extras

# Contact details
contacts = self._contact_details(dataset_ref, DCAT.contactPoint)
if contacts:
dataset_dict["contact"] = contacts

# Publishers and creators
for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]:
key, predicate = item
agents = self._agents_details(dataset_ref, predicate)
if agents:
dataset_dict[key] = agents

# Repeating subfields: resources
for schema_field in self._dataset_schema["resource_fields"]:
if "repeating_subfields" in schema_field:
Expand Down Expand Up @@ -157,7 +169,7 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
_type=URIRefOrLiteral,
)

self._add_agents(dataset_ref, dataset_dict, "publisher", DCT.publisher, first_only=True)
self._add_agents(dataset_ref, dataset_dict, "publisher", DCT.publisher)
self._add_agents(dataset_ref, dataset_dict, "creator", DCT.creator)

temporal = dataset_dict.get("temporal_coverage")
Expand Down
2 changes: 2 additions & 0 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,8 @@ def test_contact_details(self):

contact = p._contact_details(URIRef('http://example.org'), ADMS.contactPoint)

contact = contact[0]

assert contact['name'] == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert contact['email'] == '[email protected]'
Expand Down
121 changes: 115 additions & 6 deletions ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_e2e_ckan_to_dcat(self):
g,
publisher[0][2],
DCT.identifier,
URIRef(dataset_dict["publisher"][0]["identifier"])
URIRef(dataset_dict["publisher"][0]["identifier"]),
)

creator = [t for t in g.triples((dataset_ref, DCT.creator, None))]
Expand Down Expand Up @@ -221,10 +221,9 @@ def test_e2e_ckan_to_dcat(self):
g,
creator[0][2],
DCT.identifier,
URIRef(dataset_dict["creator"][0]["identifier"])
URIRef(dataset_dict["creator"][0]["identifier"]),
)


temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]

assert len(temporal) == len(dataset["temporal_coverage"])
Expand Down Expand Up @@ -275,8 +274,8 @@ def test_e2e_ckan_to_dcat(self):

# Statements
for item in [
('access_rights', DCT.accessRights),
('provenance', DCT.provenance),
("access_rights", DCT.accessRights),
("provenance", DCT.provenance),
]:
statement = [s for s in g.objects(dataset_ref, item[1])][0]
assert self._triple(g, statement, RDFS.label, dataset[item[0]])
Expand Down Expand Up @@ -402,7 +401,7 @@ def test_e2e_ckan_to_dcat(self):

# Resources: statements
statement = [s for s in g.objects(distribution_ref, DCT.rights)][0]
assert self._triple(g, statement, RDFS.label, resource['rights'])
assert self._triple(g, statement, RDFS.label, resource["rights"])

def test_publisher_fallback_org(self):

Expand Down Expand Up @@ -856,6 +855,116 @@ def test_statement_literal(self):
assert dataset["notes"] == "This is a dataset"
assert dataset["access_rights"] == "Some statement"

def test_multiple_contacts(self):

data = """
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
<https://example.com/dataset1>
a dcat:Dataset ;
dct:title "Dataset 1" ;
dct:description "This is a dataset" ;
dcat:contactPoint [ a vcard:Kind ;
vcard:fn "Test Contact 1" ;
vcard:hasEmail <mailto:[email protected]> ],
[ a vcard:Kind ;
vcard:fn "Test Contact 2" ;
vcard:hasEmail <mailto:[email protected]> ] ;
.
"""

p = RDFParser()

p.parse(data, _format="ttl")
datasets = [d for d in p.datasets()]

dataset = datasets[0]
assert len(dataset["contact"]) == 2
assert dataset["contact"][0]["name"] == "Test Contact 1"
assert dataset["contact"][0]["email"] == "[email protected]"
assert dataset["contact"][1]["name"] == "Test Contact 2"
assert dataset["contact"][1]["email"] == "[email protected]"

def test_multiple_publishers(self):

data = """
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix org: <http://www.w3.org/ns/org#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
<https://example.com/dataset1>
a dcat:Dataset ;
dct:title "Dataset 1" ;
dct:description "This is a dataset" ;
dct:publisher [ a org:Organization ;
skos:prefLabel "Test Publisher 1" ;
vcard:hasEmail <mailto:[email protected]> ;
foaf:name "Test Publisher 1" ],
[ a org:Organization ;
skos:prefLabel "Test Publisher 2" ;
vcard:hasEmail <mailto:[email protected]> ;
foaf:name "Test Publisher 2" ] ;
.
"""

p = RDFParser()

p.parse(data, _format="ttl")
datasets = [d for d in p.datasets()]

dataset = datasets[0]
assert len(dataset["publisher"]) == 2
assert dataset["publisher"][0]["name"] == "Test Publisher 1"
assert dataset["publisher"][0]["email"] == "[email protected]"
assert dataset["publisher"][1]["name"] == "Test Publisher 2"
assert dataset["publisher"][1]["email"] == "[email protected]"

def test_multiple_creators(self):

data = """
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix org: <http://www.w3.org/ns/org#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
<https://example.com/dataset1>
a dcat:Dataset ;
dct:title "Dataset 1" ;
dct:description "This is a dataset" ;
dct:creator [ a org:Organization ;
skos:prefLabel "Test Creator 1" ;
vcard:hasEmail <mailto:[email protected]> ;
foaf:name "Test Creator 1" ],
[ a org:Organization ;
skos:prefLabel "Test Creator 2" ;
vcard:hasEmail <mailto:[email protected]> ;
foaf:name "Test Creator 2" ] ;
.
"""

p = RDFParser()

p.parse(data, _format="ttl")
datasets = [d for d in p.datasets()]

dataset = datasets[0]
assert len(dataset["creator"]) == 2
assert dataset["creator"][0]["name"] == "Test Creator 1"
assert dataset["creator"][0]["email"] == "[email protected]"
assert dataset["creator"][1]["name"] == "Test Creator 2"
assert dataset["creator"][1]["email"] == "[email protected]"


@pytest.mark.usefixtures("with_plugins", "clean_db", "clean_index")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
Expand Down

0 comments on commit 2773c3c

Please sign in to comment.