Skip to content

Commit

Permalink
[#56] Add most DCAT AP 1.1 standard and list fields
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 27, 2024
1 parent 9b0abce commit e1b5f32
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 40 deletions.
139 changes: 105 additions & 34 deletions ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,23 @@ dataset_fields:
form_snippet: markdown.html
form_placeholder: eg. Some useful notes about the data

- field_name: contact
label: Contact points
repeating_label: Contact point
repeating_subfields:

- field_name: uri
label: URI

- field_name: name
label: Name

- field_name: email
label: Email
display_snippet: email.html

- field_name: tag_string
label: Tags
label: Keywords
preset: tag_string_autocomplete
form_placeholder: eg. economy, mental health, government

Expand All @@ -48,55 +63,70 @@ dataset_fields:
validators: ignore_missing unicode_safe package_version_validator
form_placeholder: '1.0'

- field_name: author
label: Author
form_placeholder: Joe Bloggs
display_property: dc:creator

- field_name: author_email
label: Author Email
form_placeholder: [email protected]
display_property: dc:creator
display_snippet: email.html
display_email_name_field: author

- field_name: maintainer
label: Maintainer
form_placeholder: Joe Bloggs
display_property: dc:contributor

- field_name: maintainer_email
label: Maintainer Email
form_placeholder: [email protected]
display_property: dc:contributor
display_snippet: email.html
display_email_name_field: maintainer
# Note: this will fall back to metadata_created if not present
- field_name: issued
label: Release date
# TODO: dcat_date preset

- field_name: contact
label: Contact points
repeating_label: Contact point
repeating_subfields:
# Note: this will fall back to metadata_modified if not present
- field_name: modified
label: Modification date
# TODO: dcat_date preset

- field_name: uri
label: URI
- field_name: identifier
label: Identifier

- field_name: name
label: Name
- field_name: frequency
label: Frequency

- field_name: email
label: Email
- field_name: provenance
label: Provenance

- field_name: dcat_type
label: Type
# TODO: controlled vocabulary?
#
- field_name: access_rights
label: Access rights
validators: ignore_missing unicode_safe
form_snippet: markdown.html

- field_name: version_notes
label: Version notes
validators: ignore_missing unicode_safe
form_snippet: markdown.html

- field_name: alternate_identifier
label: Alternate identifier
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: theme
label: Theme
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: language
label: Language
preset: multiple_text
validators: ignore_missing scheming_multiple_text
# TODO: language form snippet / validator / graph

- field_name: documentation
label: Documentation
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: conforms_to
label: Conforms to
preset: multiple_text
validators: ignore_missing scheming_multiple_text

# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI

# TODO: relation-based properties are not yet included (e.g. is_version_of, source, sample, etc)
resource_fields:

- field_name: url
Expand All @@ -116,15 +146,51 @@ resource_fields:
label: Format
preset: resource_format_autocomplete

- field_name: size
label: Size
# TODO: number validator / snippet

- field_name: rights
label: Rights
form_snippet: markdown.html
form_placeholder: Some statement about the rights associated with the resource

- field_name: status
label: Status

- field_name: license
label: License

# Note: this falls back to the standard resource url field
- field_name: access_url
label: Access URL

# Note: this falls back to the standard resource url field
- field_name: download_url
label: Download URL

- field_name: issued
label: Release date
# TODO: dcat_date preset

- field_name: modified
label: Modification date
# TODO: dcat_date preset

- field_name: language
label: Language
preset: multiple_text

- field_name: documentation
label: Documentation
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: conforms_to
label: Conforms to
preset: multiple_text
validators: ignore_missing scheming_multiple_text

- field_name: access_services
label: Access services
repeating_label: Access service
Expand All @@ -139,3 +205,8 @@ resource_fields:
- field_name: endpoint_url
label: Endpoint URL
preset: multiple_text

# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI

144 changes: 138 additions & 6 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from rdflib.namespace import RDF
from rdflib.term import URIRef

from ckan.tests.helpers import call_action

Expand Down Expand Up @@ -50,8 +51,23 @@ def test_e2e_ckan_to_dcat(self):
"version": "1.0b",
"tags": [{"name": "Tag 1"}, {"name": "Tag 2"}],
# Standard fields
"issued": "2024-05-01",
"modified": "2024-05-05",
"identifier": "xx-some-dataset-id-yy",
"frequency": "monthly",
"provenance": "Statement about provenance",
"dcat_type": "test-type",
"version_notes": "Some version notes",
"access_rights": "Statement about access rights",
# List fields (lists)
"alternate_identifier": ["alt-id-1", "alt-id-2"],
"theme": [
"https://example.org/uri/theme1",
"https://example.org/uri/theme2",
"https://example.org/uri/theme3",
],
"language": ["en", "ca", "es"],
"documentation": ["https://example.org/some-doc.html"],
"conforms_to": ["Standard 1", "Standard 2"],
# Repeating subfields
"contact": [
Expand All @@ -63,6 +79,12 @@ def test_e2e_ckan_to_dcat(self):
"name": "Resource 1",
"url": "https://example.com/data.csv",
"format": "CSV",
"status": "published",
"access_url": "https://example.com/data.csv",
"download_url": "https://example.com/data.csv",
"issued": "2024-05-01T01:20:33",
"modified": "2024-05-05T09:33:20",
"license": "http://creativecommons.org/licenses/by/3.0/",
"rights": "Some stament about rights",
"language": ["en", "ca", "es"],
"access_services": [
Expand Down Expand Up @@ -95,16 +117,53 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset)
assert self._triple(g, dataset_ref, DCT.title, dataset["title"])
assert self._triple(g, dataset_ref, DCT.description, dataset["notes"])
assert self._triple(g, dataset_ref, OWL.versionInfo, dataset["version"])

# Standard fields
assert self._triple(g, dataset_ref, DCT.identifier, dataset["identifier"])
assert self._triple(
g, dataset_ref, DCT.accrualPeriodicity, dataset["frequency"]
)
assert self._triple(g, dataset_ref, DCT.provenance, dataset["provenance"])
assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"])
assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"])
assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"])

# Dates
assert self._triple(
g,
dataset_ref,
DCT.issued,
dataset["issued"] + "T00:00:00",
data_type=XSD.dateTime,
)
assert self._triple(
g,
dataset_ref,
DCT.modified,
dataset["modified"] + "T00:00:00",
data_type=XSD.dateTime,
)

# List fields
# TODO helper function
conforms_to = [
str(t[2]) for t in g.triples((dataset_ref, DCT.conformsTo, None))
]
assert conforms_to == dataset["conforms_to"]

assert (
self._triples_list_values(g, dataset_ref, DCT.conformsTo)
== dataset["conforms_to"]
)
assert (
self._triples_list_values(g, dataset_ref, ADMS.identifier)
== dataset["alternate_identifier"]
)
assert self._triples_list_values(g, dataset_ref, DCAT.theme) == dataset["theme"]
assert (
self._triples_list_values(g, dataset_ref, DCT.language)
== dataset["language"]
)
assert (
self._triples_list_values(g, dataset_ref, FOAF.page)
== dataset["documentation"]
)

# Repeating subfields

Expand Down Expand Up @@ -137,6 +196,37 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(
g, distribution_ref, DCT.rights, dataset_dict["resources"][0]["rights"]
)
assert self._triple(
g, distribution_ref, DCT.status, dataset_dict["resources"][0]["status"]
)
assert self._triple(
g,
distribution_ref,
DCAT.accessURL,
dataset_dict["resources"][0]["access_url"],
)
assert self._triple(
g,
distribution_ref,
DCAT.downloadURL,
dataset_dict["resources"][0]["download_url"],
)

# Resources: dates
assert self._triple(
g,
distribution_ref,
DCT.issued,
dataset["resources"][0]["issued"],
data_type=XSD.dateTime,
)
assert self._triple(
g,
distribution_ref,
DCT.modified,
dataset["resources"][0]["modified"],
data_type=XSD.dateTime,
)

# Resources: list fields

Expand Down Expand Up @@ -216,21 +306,63 @@ def test_e2e_dcat_to_ckan(self):

# Standard fields
assert dataset["version_notes"] == "New schema added"
assert dataset["identifier"] == u"9df8df51-63db-37a8-e044-0003ba9b0d98"
assert dataset["frequency"] == "http://purl.org/cld/freq/daily"
assert dataset["access_rights"] == "public"
assert dataset["provenance"] == "Some statement about provenance"
assert dataset["dcat_type"] == "test-type"

assert dataset["issued"] == u"2012-05-10"
assert dataset["modified"] == u"2012-05-10T21:04:00"

# List fields
assert dataset["conforms_to"] == ["Standard 1", "Standard 2"]
assert sorted(dataset["conforms_to"]) == ["Standard 1", "Standard 2"]
assert sorted(dataset["language"]) == ["ca", "en", "es"]
assert sorted(dataset["theme"]) == [
"Earth Sciences",
"http://eurovoc.europa.eu/100142",
"http://eurovoc.europa.eu/209065",
]
assert sorted(dataset["alternate_identifier"]) == [
"alternate-identifier-1",
"alternate-identifier-2",
]
assert sorted(dataset["documentation"]) == [
"http://dataset.info.org/doc1",
"http://dataset.info.org/doc2",
]

# Repeating subfields

assert dataset["contact"][0]["name"] == "Point of Contact"
assert dataset["contact"][0]["email"] == "[email protected]"

resource = dataset["resources"][0]

# Resources: core fields
assert resource["url"] == "http://www.bgs.ac.uk/gbase/geochemcd/home.html"

# Resources: standard fields
assert resource["license"] == "http://creativecommons.org/licenses/by-nc/2.0/"
assert resource["rights"] == "Some statement about rights"
assert resource["issued"] == "2012-05-11"
assert resource["modified"] == "2012-05-01T00:04:06"
assert resource["status"] == "http://purl.org/adms/status/Completed"
assert resource["size"] == 12323

# assert resource['hash'] == u'4304cf2e751e6053c90b1804c89c0ebb758f395a'
# assert resource['hash_algorithm'] == u'http://spdx.org/rdf/terms#checksumAlgorithm_sha1'

assert resource["access_url"] == "http://www.bgs.ac.uk/gbase/geochemcd/home.html"
assert "download_url" not in resource

# Resources: list fields
assert sorted(resource["language"]) == ["ca", "en", "es"]
assert sorted(resource["documentation"]) == [
"http://dataset.info.org/distribution1/doc1",
"http://dataset.info.org/distribution1/doc2",
]
assert sorted(resource["conforms_to"]) == ["Standard 1", "Standard 2"]

# Resources: repeating subfields
assert resource["access_services"][0]["title"] == "Sparql-end Point"
Expand Down
Loading

0 comments on commit e1b5f32

Please sign in to comment.