From 557c410569caa95f779869af4e80592dc007f7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= Date: Thu, 9 Nov 2023 17:42:52 +0100 Subject: [PATCH] Unit test for using a callable for graph aggregation --- alpaca/test/res/multiple_file_output.ttl | 103 +++++++++++++++++++++++ alpaca/test/test_graph.py | 82 ++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 alpaca/test/res/multiple_file_output.ttl diff --git a/alpaca/test/res/multiple_file_output.ttl b/alpaca/test/res/multiple_file_output.ttl new file mode 100644 index 0000000..4e1d0f2 --- /dev/null +++ b/alpaca/test/res/multiple_file_output.ttl @@ -0,0 +1,103 @@ +@prefix alpaca: . +@prefix prov: . +@prefix xsd: . + + a alpaca:FileEntity ; + alpaca:filePath "/outputs/1.png"^^xsd:string ; + prov:wasDerivedFrom ; + prov:wasAttributedTo ; + prov:wasGeneratedBy . + + a alpaca:FileEntity ; + alpaca:filePath "/outputs/2.png"^^xsd:string ; + prov:wasDerivedFrom ; + prov:wasAttributedTo ; + prov:wasGeneratedBy . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + prov:wasDerivedFrom ; + prov:wasGeneratedBy ; + alpaca:hashSource "UUID" . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + prov:wasDerivedFrom ; + prov:wasGeneratedBy ; + alpaca:hashSource "UUID" . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + prov:wasDerivedFrom ; + prov:wasGeneratedBy ; + alpaca:hashSource "joblib_SHA1" . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + prov:wasDerivedFrom ; + prov:wasGeneratedBy ; + alpaca:hashSource "joblib_SHA1" . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + alpaca:hashSource "joblib_SHA1" . + + a alpaca:FileEntity ; + alpaca:filePath "/full.png"^^xsd:string ; + prov:wasDerivedFrom ; + prov:wasAttributedTo ; + prov:wasGeneratedBy . + + a alpaca:DataObjectEntity ; + prov:wasAttributedTo ; + prov:wasDerivedFrom ; + prov:wasGeneratedBy ; + alpaca:hashSource "UUID" . + + a alpaca:FunctionExecution ; + prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ; + prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ; + prov:used ; + prov:wasAssociatedWith ; + alpaca:codeStatement "plot_function(input, out_file)" ; + alpaca:executionOrder 3 ; + alpaca:usedFunction . + + a alpaca:FunctionExecution ; + prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ; + prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ; + prov:used ; + prov:wasAssociatedWith ; + alpaca:codeStatement "plot_function(input, out_file)" ; + alpaca:executionOrder 4 ; + alpaca:usedFunction . + + a alpaca:FunctionExecution ; + prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ; + prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ; + prov:used ; + prov:wasAssociatedWith ; + alpaca:codeStatement "plot_function(input, out_file)" ; + alpaca:executionOrder 1 ; + alpaca:usedFunction . + + a alpaca:FunctionExecution ;prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ; + prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ; + prov:used ; + prov:wasAssociatedWith ; + alpaca:codeStatement "cut_function(full_data)" ; + alpaca:executionOrder 2 ; + alpaca:usedFunction . + + a alpaca:Function ; + alpaca:functionName "plot_function" ; + alpaca:implementedIn "test" ; + alpaca:functionVersion "0.0.1" . + + a alpaca:Function ; + alpaca:functionName "cut_function" ; + alpaca:implementedIn "test" ; + alpaca:functionVersion "0.0.1" . + + a alpaca:ScriptAgent ; + alpaca:scriptPath "/script.py" . diff --git a/alpaca/test/test_graph.py b/alpaca/test/test_graph.py index c18945e..46bb78b 100644 --- a/alpaca/test/test_graph.py +++ b/alpaca/test/test_graph.py @@ -1,3 +1,4 @@ +import sys import unittest from pathlib import Path @@ -543,6 +544,87 @@ def test_overall_aggregation(self): for key, value in expected_values_per_node[label].items(): self.assertEqual(attrs[key], value) + def test_aggregation_by_callable(self): + graph_file = self.ttl_path / "multiple_file_output.ttl" + + # Non-aggregated graph + graph = ProvenanceGraph(graph_file) + + # Aggregate without attributes + aggregated = graph.aggregate({}, output_file=None) + + # Aggregate separating by file path in File nodes + aggregated_path = graph.aggregate({'File': ('File_path',)}, + output_file=None) + + # Aggregate using a callable to separate files which path starts with + # "/outputs/" + is_cut_plot = lambda g, n, d: d['File_path'].startswith("/outputs/") + aggregated_callable = graph.aggregate({'File': (is_cut_plot,)}, + output_file=None) + + # Define a dictionary with the expected values for each case, that + # are used in subtests below + tests = { + 'non_aggregated': {'graph': graph.graph, 'length': 10, + 'counts': {'InputObject': 3, + 'plot_function': 3, + 'cut_function': 1, + 'File': 3}, + 'paths': ["/full.png", + "/outputs/1.png", + "/outputs/2.png"] + }, + + 'aggregated': {'graph': aggregated, 'length': 5, + 'counts': {'InputObject': 2, + 'plot_function': 1, + 'cut_function': 1, + 'File': 1}, + 'paths': "/full.png;/outputs/1.png;/outputs/2.png" + }, + + 'aggregated_path': {'graph': aggregated_path, 'length': 10, + 'counts': {'InputObject': 3, + 'plot_function': 3, + 'cut_function': 1, + 'File': 3}, + 'paths': ["/full.png", + "/outputs/1.png", + "/outputs/2.png"] + }, + 'aggregated_callable': {'graph': aggregated_callable, 'length': 7, + 'counts': {'InputObject': 2, + 'plot_function': 2, + 'cut_function': 1, + 'File': 2}, + 'paths': ["/full.png", + "/outputs/1.png;/outputs/2.png"] + }, + } + + for key, expected in tests.items(): + with self.subTest(f"Graph {key}"): + test_graph = expected['graph'] + nodes = test_graph.nodes + self.assertEqual(len(nodes), expected['length']) + + # Check if node counts is as expected + all_labels = [nodes[node]['label'] for node in nodes] + counts = Counter(all_labels) + for label, count in expected['counts'].items(): + self.assertEqual(counts[label], count) + + # Check if file paths in the node are as expected + paths = expected['paths'] + for node, attrs in nodes.items(): + # Check value of file paths in File nodes + if attrs['label'] == "File": + if isinstance(paths, list): + self.assertTrue(attrs['File_path'] in paths) + else: + self.assertEqual(attrs['File_path'], paths) + def test_aggregation_by_attribute_with_missing(self): aggregated = self.graph.aggregate({'InputObject': ('id',)}, use_function_parameters=False,