Skip to content

Commit

Permalink
Unit test for using a callable for graph aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristiano Köhler committed Nov 9, 2023
1 parent 0966f35 commit 557c410
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 0 deletions.
103 changes: 103 additions & 0 deletions alpaca/test/res/multiple_file_output.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
@prefix alpaca: <http://purl.org/alpaca#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<urn:my-authority:alpaca:file:sha256:98765> a alpaca:FileEntity ;
alpaca:filePath "/outputs/1.png"^^xsd:string ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:12345> ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#12345> .

<urn:my-authority:alpaca:file:sha256:987651> a alpaca:FileEntity ;
alpaca:filePath "/outputs/2.png"^^xsd:string ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:123452> ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#123452> .

<urn:my-authority:alpaca:object:Python:builtins.NoneType:777777> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:12345> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#12345> ;
alpaca:hashSource "UUID" .

<urn:my-authority:alpaca:object:Python:builtins.NoneType:7777772> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:123452> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#123452> ;
alpaca:hashSource "UUID" .

<urn:my-authority:alpaca:object:Python:test.InputObject:12345> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.cut_function#12345> ;
alpaca:hashSource "joblib_SHA1" .

<urn:my-authority:alpaca:object:Python:test.InputObject:123452> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.cut_function#12345> ;
alpaca:hashSource "joblib_SHA1" .

<urn:my-authority:alpaca:object:Python:test.InputObject:22345> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
alpaca:hashSource "joblib_SHA1" .

<urn:my-authority:alpaca:file:sha256:18765> a alpaca:FileEntity ;
alpaca:filePath "/full.png"^^xsd:string ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#22345> .

<urn:my-authority:alpaca:object:Python:builtins.NoneType:666666> a alpaca:DataObjectEntity ;
prov:wasAttributedTo <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
prov:wasDerivedFrom <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasGeneratedBy <urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#22345> ;
alpaca:hashSource "UUID" .

<urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#12345> a alpaca:FunctionExecution ;
prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ;
prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ;
prov:used <urn:my-authority:alpaca:object:Python:test.InputObject:12345> ;
prov:wasAssociatedWith <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
alpaca:codeStatement "plot_function(input, out_file)" ;
alpaca:executionOrder 3 ;
alpaca:usedFunction <urn:my-authority:alpaca:function:Python:test.plot_function> .

<urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#123452> a alpaca:FunctionExecution ;
prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ;
prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ;
prov:used <urn:my-authority:alpaca:object:Python:test.InputObject:123452> ;
prov:wasAssociatedWith <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
alpaca:codeStatement "plot_function(input, out_file)" ;
alpaca:executionOrder 4 ;
alpaca:usedFunction <urn:my-authority:alpaca:function:Python:test.plot_function> .

<urn:my-authority:alpaca:function_execution:Python:111111:999999:test.plot_function#22345> a alpaca:FunctionExecution ;
prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ;
prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ;
prov:used <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasAssociatedWith <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
alpaca:codeStatement "plot_function(input, out_file)" ;
alpaca:executionOrder 1 ;
alpaca:usedFunction <urn:my-authority:alpaca:function:Python:test.plot_function> .

<urn:my-authority:alpaca:function_execution:Python:111111:999999:test.cut_function#12345> a alpaca:FunctionExecution ;prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ;
prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ;
prov:used <urn:my-authority:alpaca:object:Python:test.InputObject:22345> ;
prov:wasAssociatedWith <urn:my-authority:alpaca:script:Python:script.py:111111#999999> ;
alpaca:codeStatement "cut_function(full_data)" ;
alpaca:executionOrder 2 ;
alpaca:usedFunction <urn:my-authority:alpaca:function:Python:test.cut_function> .

<urn:my-authority:alpaca:function:Python:test.plot_function> a alpaca:Function ;
alpaca:functionName "plot_function" ;
alpaca:implementedIn "test" ;
alpaca:functionVersion "0.0.1" .

<urn:my-authority:alpaca:function:Python:test.cut_function> a alpaca:Function ;
alpaca:functionName "cut_function" ;
alpaca:implementedIn "test" ;
alpaca:functionVersion "0.0.1" .

<urn:my-authority:alpaca:script:Python:script.py:111111#999999> a alpaca:ScriptAgent ;
alpaca:scriptPath "/script.py" .
82 changes: 82 additions & 0 deletions alpaca/test/test_graph.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import unittest

from pathlib import Path
Expand Down Expand Up @@ -543,6 +544,87 @@ def test_overall_aggregation(self):
for key, value in expected_values_per_node[label].items():
self.assertEqual(attrs[key], value)

def test_aggregation_by_callable(self):
graph_file = self.ttl_path / "multiple_file_output.ttl"

# Non-aggregated graph
graph = ProvenanceGraph(graph_file)

# Aggregate without attributes
aggregated = graph.aggregate({}, output_file=None)

# Aggregate separating by file path in File nodes
aggregated_path = graph.aggregate({'File': ('File_path',)},
output_file=None)

# Aggregate using a callable to separate files which path starts with
# "/outputs/"
is_cut_plot = lambda g, n, d: d['File_path'].startswith("/outputs/")
aggregated_callable = graph.aggregate({'File': (is_cut_plot,)},
output_file=None)

# Define a dictionary with the expected values for each case, that
# are used in subtests below
tests = {
'non_aggregated': {'graph': graph.graph, 'length': 10,
'counts': {'InputObject': 3,
'plot_function': 3,
'cut_function': 1,
'File': 3},
'paths': ["/full.png",
"/outputs/1.png",
"/outputs/2.png"]
},

'aggregated': {'graph': aggregated, 'length': 5,
'counts': {'InputObject': 2,
'plot_function': 1,
'cut_function': 1,
'File': 1},
'paths': "/full.png;/outputs/1.png;/outputs/2.png"
},

'aggregated_path': {'graph': aggregated_path, 'length': 10,
'counts': {'InputObject': 3,
'plot_function': 3,
'cut_function': 1,
'File': 3},
'paths': ["/full.png",
"/outputs/1.png",
"/outputs/2.png"]
},
'aggregated_callable': {'graph': aggregated_callable, 'length': 7,
'counts': {'InputObject': 2,
'plot_function': 2,
'cut_function': 1,
'File': 2},
'paths': ["/full.png",
"/outputs/1.png;/outputs/2.png"]
},
}

for key, expected in tests.items():
with self.subTest(f"Graph {key}"):
test_graph = expected['graph']
nodes = test_graph.nodes
self.assertEqual(len(nodes), expected['length'])

# Check if node counts is as expected
all_labels = [nodes[node]['label'] for node in nodes]
counts = Counter(all_labels)
for label, count in expected['counts'].items():
self.assertEqual(counts[label], count)

# Check if file paths in the node are as expected
paths = expected['paths']
for node, attrs in nodes.items():
# Check value of file paths in File nodes
if attrs['label'] == "File":
if isinstance(paths, list):
self.assertTrue(attrs['File_path'] in paths)
else:
self.assertEqual(attrs['File_path'], paths)

def test_aggregation_by_attribute_with_missing(self):
aggregated = self.graph.aggregate({'InputObject': ('id',)},
use_function_parameters=False,
Expand Down

0 comments on commit 557c410

Please sign in to comment.