diff --git a/ckanext/dcat/schemas/dcat_ap_2.1.yaml b/ckanext/dcat/schemas/dcat_ap_2.1.yaml
index 3c4b7232..96fa2cb4 100644
--- a/ckanext/dcat/schemas/dcat_ap_2.1.yaml
+++ b/ckanext/dcat/schemas/dcat_ap_2.1.yaml
@@ -120,6 +120,11 @@ dataset_fields:
label: End
# TODO: dcat_date preset
+- field_name: temporal_resolution
+ label: Temporal resolution
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+
- field_name: spatial_coverage
label: Spatial coverage
repeating_subfields:
@@ -139,6 +144,12 @@ dataset_fields:
- field_name: centroid
label: Centroid
+#- field_name: spatial_resolution_in_meters
+# label: Spatial resolution in meters
+# preset: multiple_text
+# validators: ignore_missing scheming_multiple_text
+# TODO: scheming_multiple_decimal
+
- field_name: access_rights
label: Access rights
validators: ignore_missing unicode_safe
@@ -175,6 +186,23 @@ dataset_fields:
preset: multiple_text
validators: ignore_missing scheming_multiple_text
+- field_name: is_referenced_by
+ label: Is referenced by
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+
+- field_name: applicable_legislation
+ label: Applicable legislation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+
+#- field_name: hvd_category
+# label: HVD Category
+# preset: multiple_text
+# validators: ignore_missing scheming_multiple_text
+# TODO: implement separately as part of wider HVD support
+
+
# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI
@@ -199,15 +227,37 @@ resource_fields:
label: Format
preset: resource_format_autocomplete
+- field_name: mimetype
+ label: Media type
+ # TODO: get from format
+
+- field_name: compress_format
+ label: Compress format
+ # TODO: media type validator
+
+- field_name: package_format
+ label: Package format
+ # TODO: media type validator
+
- field_name: size
label: Size
# TODO: number validator / snippet
+- field_name: hash
+ label: Hash
+ # TODO: generate for uploads?
+
+- field_name: hash_algorithm
+ label: Hash Algorithm
+
- field_name: rights
label: Rights
form_snippet: markdown.html
form_placeholder: Some statement about the rights associated with the resource
+- field_name: availability
+ label: Availability
+
- field_name: status
label: Status
@@ -233,6 +283,7 @@ resource_fields:
- field_name: language
label: Language
preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
- field_name: documentation
label: Documentation
@@ -244,6 +295,11 @@ resource_fields:
preset: multiple_text
validators: ignore_missing scheming_multiple_text
+- field_name: applicable_legislation
+ label: Applicable legislation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+
- field_name: access_services
label: Access services
repeating_label: Access service
diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py
index 978f25c2..bce24665 100644
--- a/ckanext/dcat/tests/test_scheming_support.py
+++ b/ckanext/dcat/tests/test_scheming_support.py
@@ -13,6 +13,7 @@
from ckanext.dcat.processors import RDFSerializer, RDFParser
from ckanext.dcat.profiles import (
DCAT,
+ DCATAP,
DCT,
ADMS,
XSD,
@@ -24,6 +25,7 @@
GSP,
OWL,
GEOJSON_IMT,
+ SPDX,
)
from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest
@@ -75,6 +77,14 @@ def test_e2e_ckan_to_dcat(self):
"language": ["en", "ca", "es"],
"documentation": ["https://example.org/some-doc.html"],
"conforms_to": ["Standard 1", "Standard 2"],
+ "is_referenced_by": [
+ "https://doi.org/10.1038/sdata.2018.22",
+ "test_isreferencedby",
+ ],
+ "applicable_legislation": [
+ "http://data.europa.eu/eli/reg_impl/2023/138/oj",
+ "http://data.europa.eu/eli/reg_impl/2023/138/oj_alt",
+ ],
# Repeating subfields
"contact": [
{"name": "Contact 1", "email": "contact1@example.org"},
@@ -92,6 +102,7 @@ def test_e2e_ckan_to_dcat(self):
{"start": "1905-03-01", "end": "2013-01-05"},
{"start": "2024-04-10", "end": "2024-05-29"},
],
+ "temporal_resolution": ["PT15M", "P1D"],
"spatial_coverage": [
{
"geom": {
@@ -123,12 +134,19 @@ def test_e2e_ckan_to_dcat(self):
"centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]},
}
],
+ "spatial_resolution_in_meters": [1.5, 2.0],
"resources": [
{
"name": "Resource 1",
"description": "Some description",
"url": "https://example.com/data.csv",
"format": "CSV",
+ "availability": "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL",
+ "compress_format": "http://www.iana.org/assignments/media-types/application/gzip",
+ "package_format": "http://publications.europa.eu/resource/authority/file-type/TAR",
+ "size": 12323,
+ "hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a",
+ "hash_algorithm": "http://spdx.org/rdf/terms#checksumAlgorithm_sha1",
"status": "published",
"access_url": "https://example.com/data.csv",
"download_url": "https://example.com/data.csv",
@@ -214,6 +232,24 @@ def test_e2e_ckan_to_dcat(self):
self._triples_list_values(g, dataset_ref, FOAF.page)
== dataset["documentation"]
)
+ assert (
+ self._triples_list_values(g, dataset_ref, DCAT.temporalResolution)
+ == dataset["temporal_resolution"]
+ )
+ assert (
+ self._triples_list_values(g, dataset_ref, DCT.isReferencedBy)
+ == dataset["is_referenced_by"]
+ )
+ assert (
+ self._triples_list_values(g, dataset_ref, DCATAP.applicableLegislation)
+ == dataset["applicable_legislation"]
+ )
+
+ # TODO: enable after validator
+ # assert (
+ # self._triples_list_values(g, dataset_ref, DCAT.spatialResolutionInMeters)
+ # == dataset["spatial_resolution_in_meters"]
+ # )
# Repeating subfields
@@ -318,38 +354,67 @@ def test_e2e_ckan_to_dcat(self):
assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral)
distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
+ resource = dataset_dict["resources"][0]
# Resources: core fields
- assert self._triple(
- g, distribution_ref, DCT.title, dataset_dict["resources"][0]["name"]
- )
+ assert self._triple(g, distribution_ref, DCT.title, resource["name"])
assert self._triple(
g,
distribution_ref,
DCT.description,
- dataset_dict["resources"][0]["description"],
+ resource["description"],
)
# Resources: standard fields
+ assert self._triple(g, distribution_ref, DCT.rights, resource["rights"])
+ assert self._triple(g, distribution_ref, ADMS.status, resource["status"])
assert self._triple(
- g, distribution_ref, DCT.rights, dataset_dict["resources"][0]["rights"]
+ g,
+ distribution_ref,
+ DCAT.accessURL,
+ URIRef(resource["access_url"]),
)
assert self._triple(
- g, distribution_ref, ADMS.status, dataset_dict["resources"][0]["status"]
+ g,
+ distribution_ref,
+ DCATAP.availability,
+ URIRef(resource["availability"]),
)
assert self._triple(
g,
distribution_ref,
- DCAT.accessURL,
- URIRef(dataset_dict["resources"][0]["access_url"]),
+ DCAT.compressFormat,
+ URIRef(resource["compress_format"]),
+ )
+ assert self._triple(
+ g,
+ distribution_ref,
+ DCAT.packageFormat,
+ URIRef(resource["package_format"]),
)
assert self._triple(
g,
distribution_ref,
DCAT.downloadURL,
- URIRef(dataset_dict["resources"][0]["download_url"]),
+ URIRef(resource["download_url"]),
+ )
+
+ assert self._triple(g, distribution_ref, DCAT.byteSize, float(resource['size']), XSD.decimal)
+ # Checksum
+ checksum = self._triple(g, distribution_ref, SPDX.checksum, None)[2]
+ assert checksum
+ assert self._triple(g, checksum, RDF.type, SPDX.Checksum)
+ assert self._triple(
+ g,
+ checksum,
+ SPDX.checksumValue,
+ resource["hash"],
+ data_type="http://www.w3.org/2001/XMLSchema#hexBinary",
+ )
+ assert self._triple(
+ g, checksum, SPDX.algorithm, URIRef(resource["hash_algorithm"])
)
# Resources: dates
@@ -369,11 +434,10 @@ def test_e2e_ckan_to_dcat(self):
)
# Resources: list fields
-
- language = [
- str(t[2]) for t in g.triples((distribution_ref, DCT.language, None))
- ]
- assert language == dataset_dict["resources"][0]["language"]
+ assert (
+ self._triples_list_values(g, distribution_ref, DCT.language)
+ == resource["language"]
+ )
# Resource: repeating subfields
access_services = [
@@ -385,17 +449,14 @@ def test_e2e_ckan_to_dcat(self):
g,
access_services[0][2],
DCT.title,
- dataset_dict["resources"][0]["access_services"][0]["title"],
+ resource["access_services"][0]["title"],
)
endpoint_urls = [
str(t[2])
for t in g.triples((access_services[0][2], DCAT.endpointURL, None))
]
- assert (
- endpoint_urls
- == dataset_dict["resources"][0]["access_services"][0]["endpoint_url"]
- )
+ assert endpoint_urls == resource["access_services"][0]["endpoint_url"]
def test_publisher_fallback_org(self):
@@ -555,7 +616,18 @@ def test_e2e_dcat_to_ckan(self):
"http://dataset.info.org/doc1",
"http://dataset.info.org/doc2",
]
-
+ assert sorted(dataset["temporal_resolution"]) == [
+ "P1D",
+ "PT15M",
+ ]
+ assert sorted(dataset["is_referenced_by"]) == [
+ "https://doi.org/10.1038/sdata.2018.22",
+ "test_isreferencedby",
+ ]
+ assert sorted(dataset["applicable_legislation"]) == [
+ "http://data.europa.eu/eli/reg_impl/2023/138/oj",
+ "http://data.europa.eu/eli/reg_impl/2023/138/oj_alt",
+ ]
# Repeating subfields
assert dataset["contact"][0]["name"] == "Point of Contact"
@@ -585,9 +657,24 @@ def test_e2e_dcat_to_ckan(self):
assert resource["modified"] == "2012-05-01T00:04:06"
assert resource["status"] == "http://purl.org/adms/status/Completed"
assert resource["size"] == 12323
+ assert (
+ resource["availability"]
+ == "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL"
+ )
+ assert (
+ resource["compress_format"]
+ == "http://www.iana.org/assignments/media-types/application/gzip"
+ )
+ assert (
+ resource["package_format"]
+ == "http://publications.europa.eu/resource/authority/file-type/TAR"
+ )
- # assert resource['hash'] == u'4304cf2e751e6053c90b1804c89c0ebb758f395a'
- # assert resource['hash_algorithm'] == u'http://spdx.org/rdf/terms#checksumAlgorithm_sha1'
+ assert resource["hash"] == "4304cf2e751e6053c90b1804c89c0ebb758f395a"
+ assert (
+ resource["hash_algorithm"]
+ == "http://spdx.org/rdf/terms#checksumAlgorithm_sha1"
+ )
assert (
resource["access_url"] == "http://www.bgs.ac.uk/gbase/geochemcd/home.html"
diff --git a/examples/dataset.rdf b/examples/dataset.rdf
index 6b445dff..f7db02db 100644
--- a/examples/dataset.rdf
+++ b/examples/dataset.rdf
@@ -37,6 +37,8 @@
Standard 2
+ 1.5
+ 2.0
public
@@ -50,6 +52,10 @@
+ https://doi.org/10.1038/sdata.2018.22
+ test_isreferencedby
+
+
@@ -57,6 +63,8 @@
2013-01-05
+ PT15M
+ P1D
Point of Contact
@@ -80,9 +88,12 @@
Some statement about rights
+ http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL
http://www.bgs.ac.uk/gbase/geochemcd/home.html
HTML
text/html
+ http://www.iana.org/assignments/media-types/application/gzip
+ http://publications.europa.eu/resource/authority/file-type/TAR
12323