Skip to content

Commit

Permalink
Support for multiple class annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristiano Köhler committed Nov 29, 2023
1 parent 7887bc9 commit 871fd1f
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 24 deletions.
55 changes: 36 additions & 19 deletions alpaca/ontology/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@
`__ontology__` dictionary will define the main URI describing either the
function or the data object:
* 'function' : str
A URI to the ontology class representing the Python function.
* 'data_object' : str
A URI to the ontology class representing the Python data object.
* 'function' : str or list of str
A URI to the ontology class representing the Python function. Multiple URIs
can be passed as a list, if the function is represented by multiple classes.
* 'data_object' : str or list of str
A URI to the ontology class representing the Python data object. Multiple
URIs can be passed as a list, if the object is represented by multiple
classes.
Additional annotations can be stored depending on whether a function or data
object is being annotated.
Expand All @@ -35,11 +38,12 @@
* 'arguments' : dict
A dictionary where the keys are argument names (cf. the function
declaration in the `def` statement) and the values are the URI
to the ontology class representing the argument.
declaration in the `def` statement) and the values are the URI(s)
to the ontology class(es) representing the argument.
* 'returns' : dict
A dictionary where the keys are function outputs, and the values define the
URIs to the ontology classes that represent each output identified by a key.
URI(s) to the ontology class(es) representing each output identified by a
key.
The keys in the `returns` dictionary can have three possible values:
1. a string with one output name (if this is the name of an argument, cf.
the function declaration in the `def` statement), which assumes that a
Expand All @@ -63,11 +67,11 @@
* 'attributes' : dict
A dictionary where the keys are object attribute names and the values are
the URI to the ontology class representing the attribute.
the URI(s) to the ontology class(es) representing the attribute.
* 'annotations' : dict
A dictionary where the keys are annotation names and the values are the
URI to the ontology class representing the annotation. Annotations are
key-pair values specified in dictionaries stored as one attribute of the
URI(s) to the ontology class(es) representing the annotation. Annotations
are key-pair values specified in dictionaries stored as one attribute of the
object (e.g., `obj.annotations`).
Finally, the ontology annotations can be defined using namespaces so that the
Expand Down Expand Up @@ -230,8 +234,8 @@ def get_container_returns(self):

def get_uri(self, information_type, element=None):
if information_type in VALID_OBJECTS:
# Information on 'function' and 'data_object' are strings, stored
# directly as attributes
# Information on 'function' and 'data_object' are strings or
# lists, stored directly as attributes
information_value = getattr(self, information_type)
else:
# Specific information of 'function' and 'data_object' are
Expand All @@ -249,13 +253,26 @@ def get_uri(self, information_type, element=None):
if not information_value:
return None

if (information_value[0], information_value[-1]) == ("<", ">"):
# This is a URI
return rdflib.URIRef(information_value[1:-1])

# If not full URIs, information must be CURIEs. Get the URIRef.
prefix, value = information_value.split(":")
return self.namespaces[prefix][value]
if not isinstance(information_value, list):
information_value = [information_value]

# Process URI(s) to get `rdflib.URIRef` elements, resolving any
# namespace.
uris = []
for uri in information_value:
if (uri[0], uri[-1]) == ("<", ">"):
# This is a full URI
uris.append(rdflib.URIRef(uri[1:-1]))
else:
# If not full URIs, information must be CURIEs.
# Get the `URIRef` from the namespace.
prefix, value = uri.split(":")
uris.append(self.namespaces[prefix][value])

if len(uris) == 1:
# Return annotation with a single URI directly
return uris[0]
return uris

def __repr__(self):
repr_str = "OntologyInformation("
Expand Down
14 changes: 9 additions & 5 deletions alpaca/serialization/prov.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,15 @@ def _add_Function(self, function_info):
Literal(function_info.version)))
return uri

def _add_ontology_information(self, uri, ontology_info, information_type,
element=None):
class_iri = ontology_info.get_uri(information_type, element)
if class_iri:
self.graph.add((uri, RDF.type, class_iri))
def _add_ontology_information(self, target_uri, ontology_info,
information_type, element=None):
class_info = ontology_info.get_uri(information_type, element)
if class_info:
if isinstance(class_info, list):
for class_uri in class_info:
self.graph.add((target_uri, RDF.type, class_uri))
else:
self.graph.add((target_uri, RDF.type, class_info))

def _add_FunctionExecution(self, script_info, session_id, execution_id,
function_info, params, execution_order,
Expand Down
127 changes: 127 additions & 0 deletions alpaca/test/test_ontology_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ def process(input, param_1):
}


@Provenance(inputs=['input'])
def process_one_and_process_two(input, param_1):
return OutputObject("SpikeTrain#1", 45)

process_one_and_process_two.__wrapped__.__ontology__ = {
"function": ["ontology:Process1Function", "ontology:Process2Function"],
"namespaces": EXAMPLE_NS,
"arguments": {'param_1': "ontology:Parameter"},
"returns": {0: "ontology:ProcessedData"}
}


@Provenance(inputs=['input'])
def process_multiple(input, param_1):
return "not_annotated", OutputObject("SpikeTrain#2", 34)
Expand Down Expand Up @@ -259,6 +271,29 @@ def test_annotation_function(self):
"returns={0: 'ontology:ProcessedData'})"
)

def test_annotation_function_multiple_annotations(self):
self.assertIsNotNone(
_OntologyInformation.get_ontology_information(
process_one_and_process_two))
info = _OntologyInformation(process_one_and_process_two)
self.assertListEqual(
info.get_uri("function"),
[URIRef("http://example.org/ontology#Process1Function"),
URIRef("http://example.org/ontology#Process2Function")])
self.assertEqual(
info.get_uri("arguments", "param_1"),
URIRef("http://example.org/ontology#Parameter"))
self.assertEqual(
info.get_uri("returns", 0),
URIRef("http://example.org/ontology#ProcessedData"))
self.assertEqual(
str(info),
"OntologyInformation(function='['ontology:Process1Function', "
"'ontology:Process2Function']', "
"arguments={'param_1': 'ontology:Parameter'}, "
f"namespaces={{'ontology': {repr(self.ONTOLOGY)}}}, "
"returns={0: 'ontology:ProcessedData'})")

def test_annotation_function_multiple(self):
self.assertIsNotNone(
_OntologyInformation.get_ontology_information(process_multiple))
Expand Down Expand Up @@ -409,6 +444,98 @@ def test_provenance_annotation(self):
self.assertTrue((output_node,
PROV.wasDerivedFrom, input_node) in prov_graph)

def test_provenance_multiple_annotations(self):
activate(clear=True)
input_object = InputObject()
output_object = process_one_and_process_two(input_object, 34)
deactivate()

prov_data = save_provenance()

# Read PROV information as RDF
prov_graph = Graph()
with io.StringIO(prov_data) as data_stream:
prov_graph.parse(data_stream, format='turtle')

# Check that the annotations exist (1 per class is expected)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.Parameter)))
), 1)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.Process1Function)))
), 1)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.Process2Function)))
), 1)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.ProcessedData)))
), 1)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.InputObject)))
), 1)
self.assertEqual(
len(list(prov_graph.triples(
(None, RDF.type, self.ONTOLOGY.OutputObject)))
), 1)

# FunctionExecution is ProcessFunction
execution_uri = list(
prov_graph.subjects(RDF.type, ALPACA.FunctionExecution))[0]
self.assertTrue((execution_uri,
RDF.type,
self.ONTOLOGY.Process1Function) in prov_graph)

self.assertTrue((execution_uri,
RDF.type,
self.ONTOLOGY.Process2Function) in prov_graph)

# Check parameter name
parameter_node = list(
prov_graph.subjects(RDF.type, self.ONTOLOGY.Parameter))[0]
self.assertTrue((parameter_node,
ALPACA.pairName, Literal("param_1")) in prov_graph)
self.assertTrue((parameter_node,
ALPACA.pairValue, Literal(34)) in prov_graph)

# Check returned value
output_node = list(
prov_graph.subjects(RDF.type, self.ONTOLOGY.ProcessedData))[0]
self.assertTrue((output_node,
PROV.wasGeneratedBy, execution_uri) in prov_graph)
self.assertTrue((output_node,
RDF.type, ALPACA.DataObjectEntity) in prov_graph)
self.assertTrue((output_node,
RDF.type, self.ONTOLOGY.OutputObject) in prov_graph)

# Check attributes of returned value
expected_attributes = {
'name': "SpikeTrain#1",
'channel': 45,
}
for attribute in prov_graph.objects(output_node, ALPACA.hasAttribute):
name = prov_graph.value(attribute, ALPACA.pairName).toPython()
value = prov_graph.value(attribute, ALPACA.pairValue).toPython()
self.assertEqual(value, expected_attributes[name])

# Check if attribute annotation is present for `name`
if name == 'name':
self.assertTrue((attribute, RDF.type, self.ONTOLOGY.Attribute)
in prov_graph)

# Check input value
input_node = list(
prov_graph.subjects(RDF.type, self.ONTOLOGY.InputObject))[0]
self.assertTrue((execution_uri, PROV.used, input_node) in prov_graph)
self.assertTrue((input_node,
RDF.type, ALPACA.DataObjectEntity) in prov_graph)
self.assertTrue((output_node,
PROV.wasDerivedFrom, input_node) in prov_graph)

def test_provenance_annotation_multiple_returns(self):
activate(clear=True)
input_object = InputObject()
Expand Down

0 comments on commit 871fd1f

Please sign in to comment.