From c86e8a43da36cceefb2070e50899e2cf9b9d3f8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Fri, 14 Jul 2023 14:28:01 +0200
Subject: [PATCH 1/9] Implemented capture of values of strings, booleans,
 complex and numeric types.

---
 alpaca/alpaca_types.py            |  2 +-
 alpaca/data_information.py        | 13 +++++++--
 alpaca/test/test_code_analysis.py | 46 +++++++++++++++++--------------
 alpaca/test/test_decorator.py     | 42 ++++++++++++++--------------
 alpaca/test/test_serialization.py | 33 +++++++++++++---------
 5 files changed, 80 insertions(+), 56 deletions(-)

diff --git a/alpaca/alpaca_types.py b/alpaca/alpaca_types.py
index aacd846..6610cbe 100644
--- a/alpaca/alpaca_types.py
+++ b/alpaca/alpaca_types.py
@@ -61,6 +61,6 @@
 # the disk.
 
 DataObject = namedtuple('DataObject', ('hash', 'hash_method', 'type', 'id',
-                                       'details'))
+                                       'details', 'value'))
 
 File = namedtuple('File', ('hash', 'hash_type', 'path'))
diff --git a/alpaca/data_information.py b/alpaca/data_information.py
index f6eedbb..9cf3bd3 100644
--- a/alpaca/data_information.py
+++ b/alpaca/data_information.py
@@ -258,6 +258,10 @@ def info(self, obj):
                 Reference for the object.
             * details : dict
                 Extended information (metadata) on the object.
+            * value : object
+                For builtin objects (`str`, `int`, `float`, `bool`) or
+                equivalent objects (e.g. `numpy.float64`), the value is
+                stored.
         """
         type_information = type(obj)
         obj_type = f"{type_information.__module__}.{type_information.__name__}"
@@ -267,7 +271,8 @@ def info(self, obj):
         if obj is None:
             unique_id = uuid.uuid4()
             return DataObject(hash=unique_id, hash_method="UUID",
-                              type=obj_type, id=obj_id, details={})
+                              type=obj_type, id=obj_id, details={},
+                              value=None)
 
         # Here we can extract specific metadata to record
         details = {}
@@ -290,5 +295,9 @@ def info(self, obj):
                                                       obj_id=obj_id,
                                                       package=package)
 
+        obj_value = obj if isinstance(obj, (str, int, bool, complex, float,
+                                            np.number)) else None
+
         return DataObject(hash=obj_hash, hash_method=hash_method,
-                          type=obj_type, id=obj_id, details=details)
+                          type=obj_type, id=obj_id, details=details,
+                          value=obj_value)
diff --git a/alpaca/test/test_code_analysis.py b/alpaca/test/test_code_analysis.py
index 0060f20..b0ac144 100644
--- a/alpaca/test/test_code_analysis.py
+++ b/alpaca/test/test_code_analysis.py
@@ -28,28 +28,32 @@
 TEST_ARRAY_INFO = DataObject(hash=joblib_hash(TEST_ARRAY),
                              hash_method="joblib_SHA1",
                              type="numpy.ndarray", id=id(TEST_ARRAY),
-                             details={'shape': (3,), 'dtype': np.int64})
+                             details={'shape': (3,), 'dtype': np.int64},
+                             value=None)
 
 ELEMENT_0_INFO = DataObject(hash=joblib_hash(TEST_ARRAY[0]),
                             hash_method="joblib_SHA1", type="numpy.int64",
                             id=id(TEST_ARRAY[0]),
-                            details={'shape': (), 'dtype': np.int64})
+                            details={'shape': (), 'dtype': np.int64},
+                            value=1)
 
 ELEMENT_1_INFO = DataObject(hash=joblib_hash(TEST_ARRAY[1]),
                             hash_method="joblib_SHA1", type="numpy.int64",
                             id=id(TEST_ARRAY[1]),
-                            details={'shape': (), 'dtype': np.int64})
+                            details={'shape': (), 'dtype': np.int64},
+                            value=2)
 
 ELEMENT_2_INFO = DataObject(hash=joblib_hash(TEST_ARRAY[2]),
                             hash_method="joblib_SHA1", type="numpy.int64",
                             id=id(TEST_ARRAY[2]),
-                            details={'shape': (), 'dtype': np.int64})
+                            details={'shape': (), 'dtype': np.int64},
+                            value=3)
 
 TEST_DICT = {'numbers': TEST_ARRAY}
 TEST_DICT_INFO = DataObject(hash=joblib_hash(TEST_DICT),
                             hash_method="joblib_SHA1",
                             type="builtins.dict", id=id(TEST_DICT),
-                            details={})
+                            details={}, value=None)
 
 
 # To test attributes
@@ -95,7 +99,8 @@ def _check_function_execution(actual, exp_function, exp_input, exp_params,
                               exp_code_stmnt, exp_return_targets, exp_order,
                               test_case):
 
-    data_object_attributes = ('hash', 'hash_method', 'type', 'details')
+    data_object_attributes = ('hash', 'hash_method', 'type', 'details',
+                              'value')
 
     # Check function
     test_case.assertTupleEqual(actual.function, exp_function)
@@ -157,7 +162,7 @@ def test_subscript_index(self):
             hash=joblib_hash(TEST_ARRAY[0]+TEST_ARRAY[1]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=3)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -205,7 +210,7 @@ def test_subscript_negative_index(self):
             hash=joblib_hash(TEST_ARRAY[-1]+TEST_ARRAY[-2]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=5)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -253,12 +258,12 @@ def test_subscript_slice(self):
             hash=joblib_hash(TEST_ARRAY[0]+TEST_ARRAY[1]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=3)
 
         expected_slice_output = DataObject(
             hash=joblib_hash(TEST_ARRAY[0:2]), hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(TEST_ARRAY[0:2]),
-            details={'shape': (2,), 'dtype': np.int64})
+            details={'shape': (2,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -296,12 +301,12 @@ def test_subscript_slice_no_start(self):
             hash=joblib_hash(TEST_ARRAY[0]+TEST_ARRAY[1]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=3)
 
         expected_slice_output = DataObject(
             hash=joblib_hash(TEST_ARRAY[:2]), hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(TEST_ARRAY[:2]),
-            details={'shape': (2,), 'dtype': np.int64})
+            details={'shape': (2,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -339,12 +344,12 @@ def test_subscript_slice_no_stop(self):
             hash=joblib_hash(TEST_ARRAY[1]+TEST_ARRAY[2]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=5)
 
         expected_slice_output = DataObject(
             hash=joblib_hash(TEST_ARRAY[1:]), hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(TEST_ARRAY[1:]),
-            details={'shape': (2,), 'dtype': np.int64})
+            details={'shape': (2,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -382,12 +387,12 @@ def test_subscript_slice_step(self):
             hash=joblib_hash(TEST_ARRAY[0]+TEST_ARRAY[2]),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=4)
 
         expected_slice_output = DataObject(
             hash=joblib_hash(TEST_ARRAY[::2]), hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(TEST_ARRAY[::2]),
-            details={'shape': (2,), 'dtype': np.int64})
+            details={'shape': (2,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -425,7 +430,7 @@ def test_subscript_index_str(self):
             hash=joblib_hash(np.sum(TEST_ARRAY)),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=6)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -464,7 +469,7 @@ def test_subscript_index_from_variable(self):
             hash=joblib_hash(np.sum(TEST_ARRAY)),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=6)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -502,12 +507,13 @@ def test_attribute(self):
             hash=joblib_hash(np.sum(TEST_ARRAY)),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64}, value=6)
 
         expected_container_info = DataObject(
             hash=joblib_hash(container_of_array), hash_method="joblib_SHA1",
             type="test_code_analysis.ContainerOfArray",
-            id=id(container_of_array), details={'array': TEST_ARRAY})
+            id=id(container_of_array), details={'array': TEST_ARRAY},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
diff --git a/alpaca/test/test_decorator.py b/alpaca/test/test_decorator.py
index de1769e..1445931 100644
--- a/alpaca/test/test_decorator.py
+++ b/alpaca/test/test_decorator.py
@@ -24,14 +24,16 @@
 TEST_ARRAY_INFO = DataObject(hash=joblib.hash(TEST_ARRAY, hash_name='sha1'),
                              hash_method="joblib_SHA1",
                              type="numpy.ndarray", id=id(TEST_ARRAY),
-                             details={'shape': (3,), 'dtype': np.int64})
+                             details={'shape': (3,), 'dtype': np.int64},
+                             value=None)
 
 TEST_ARRAY_2 = np.array([4, 5, 6])
 TEST_ARRAY_2_INFO = DataObject(hash=joblib.hash(TEST_ARRAY_2,
                                                 hash_name='sha1'),
                                hash_method="joblib_SHA1",
                                type="numpy.ndarray", id=id(TEST_ARRAY_2),
-                               details={'shape': (3,), 'dtype': np.int64})
+                               details={'shape': (3,), 'dtype': np.int64},
+                               value=None)
 
 CONTAINER = [TEST_ARRAY, TEST_ARRAY_2]
 
@@ -327,7 +329,7 @@ def test_simple_function(self):
             hash=joblib.hash(TEST_ARRAY+3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -359,7 +361,7 @@ def test_simple_function_no_target(self):
             hash=joblib.hash(TEST_ARRAY+3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=output_id,
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -386,7 +388,7 @@ def test_kwargs_params(self):
             hash=joblib.hash(TEST_ARRAY+3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -413,7 +415,7 @@ def test_kwargs_params_default(self):
             hash=joblib.hash(TEST_ARRAY+5, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -440,7 +442,7 @@ def test_kwargs_params_default_override(self):
             hash=joblib.hash(TEST_ARRAY+5, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -468,7 +470,7 @@ def test_container_input_function(self):
             hash=joblib.hash(np.float64(3.5), hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.float64", id=id(avg),
-            details={'shape': (), 'dtype': np.float64})
+            details={'shape': (), 'dtype': np.float64}, value=3.5)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -497,7 +499,7 @@ def test_varargs_input_function(self):
             hash=joblib.hash(np.float64(3.5), hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.float64", id=id(avg),
-            details={'shape': (), 'dtype': np.float64})
+            details={'shape': (), 'dtype': np.float64}, value=3.5)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -525,7 +527,7 @@ def test_multiple_inputs_function(self):
             hash=joblib.hash(TEST_ARRAY+TEST_ARRAY_2, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -553,13 +555,13 @@ def test_multiple_outputs_function_elements(self):
             hash=joblib.hash(TEST_ARRAY+3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res1),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_output_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY+4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res2),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -586,7 +588,7 @@ def test_multiple_outputs_function_tuple(self):
             hash=joblib.hash((TEST_ARRAY+3, TEST_ARRAY+4), hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="builtins.tuple", id=id(res),
-            details={})
+            details={}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -613,13 +615,13 @@ def test_container_output_function(self):
             hash=joblib.hash(TEST_ARRAY + 3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_output_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -1057,7 +1059,7 @@ def test_file_input(self):
         expected_output = DataObject(
             hash=joblib.hash(expected_list, hash_name='sha1'),
             hash_method="joblib_SHA1",
-            type="builtins.list", id=id(res), details={})
+            type="builtins.list", id=id(res), details={}, value=None)
 
         expected_file = File("96ccc1380e069667069acecea3e2ab559441657807e0a86d14f49028710ddb3a",
                              hash_type="sha256", path=file_name)
@@ -1089,14 +1091,14 @@ def test_file_output(self):
         expected_input = DataObject(
             hash=joblib.hash(input_list, hash_name='sha1'),
             hash_method="joblib_SHA1",
-            type="builtins.list", id=id(input_list), details={})
+            type="builtins.list", id=id(input_list), details={}, value=None)
 
         # As None has its own UUID, let's get what was generated
         self.assertEqual(len(Provenance.history), 1)
         output_uuid = Provenance.history[0].output[0].hash
 
         expected_none_output = DataObject(hash=output_uuid, hash_method="UUID",
-            type="builtins.NoneType", id=id(res), details={})
+            type="builtins.NoneType", id=id(res), details={}, value=None)
 
         expected_file = File("00d20b4831b0dadded2c633bdfc3dde3926fc17baaed51dacdab3e52a3b0d419",
                              hash_type="sha256", path=Path(file_name))
@@ -1224,13 +1226,13 @@ def test_object_method(self):
             hash_method="joblib_SHA1",
             type="test_decorator.ObjectWithMethod",
             id=id(obj),
-            details={'coefficient': 2})
+            details={'coefficient': 2}, value=None)
 
         expected_output = DataObject(
             hash=joblib.hash(TEST_ARRAY+2, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
diff --git a/alpaca/test/test_serialization.py b/alpaca/test/test_serialization.py
index 79fc12f..fd9ac15 100644
--- a/alpaca/test/test_serialization.py
+++ b/alpaca/test/test_serialization.py
@@ -23,14 +23,15 @@
 TEST_FUNCTION = FunctionInfo("test_function", "test", "0.0.1")
 
 # Object without metadata
-INPUT = DataObject("12345", "joblib_SHA1", "test.InputObject", 12345, {})
+INPUT = DataObject("12345", "joblib_SHA1", "test.InputObject", 12345, {}, None)
 
 # Object with all main types of metadata
 INPUT_METADATA = DataObject("12345", "joblib_SHA1", "test.InputObject", 12345,
                             details={'metadata_1': "value1",
                                      'metadata_2': 5,
                                      'metadata_3': 5.0,
-                                     'metadata_4': True})
+                                     'metadata_4': True},
+                            value=None)
 
 OUTPUT_METADATA_NEO = DataObject("54321", "joblib_SHA1",
                                  "neo.core.SpikeTrain", 54321,
@@ -42,7 +43,8 @@
                                                   [0, 1, 2, 3]),
                                               'event': np.array(
                                                   [True, False, False])}
-                                          })
+                                          },
+                                 value=None)
 
 # Object with special metadata
 
@@ -51,15 +53,20 @@
 OUTPUT_FILE = File("98765", "sha256", "/test_file_output")
 
 # Simple objects to test multiple inputs/outputs handling
-INPUT_2 = DataObject("212345", "joblib_SHA1", "test.InputObject", 212345, {})
-OUTPUT = DataObject("54321", "joblib_SHA1", "test.OutputObject", 54321, {})
-OUTPUT_2 = DataObject("254321", "joblib_SHA1", "test.OutputObject", 254321, {})
+INPUT_2 = DataObject("212345", "joblib_SHA1", "test.InputObject", 212345, {},
+                     None)
+OUTPUT = DataObject("54321", "joblib_SHA1", "test.OutputObject", 54321, {},
+                    None)
+OUTPUT_2 = DataObject("254321", "joblib_SHA1", "test.OutputObject", 254321, {},
+                      None)
 
 # None output
-NONE_OUTPUT = DataObject("777777", "UUID", "builtins.NoneType", 777777, {})
+NONE_OUTPUT = DataObject("777777", "UUID", "builtins.NoneType", 777777, {},
+                         None)
 
 # Object collections
-COLLECTION = DataObject("888888", "joblib_SHA1", "builtins.list", 888888, {})
+COLLECTION = DataObject("888888", "joblib_SHA1", "builtins.list", 888888, {},
+                        None)
 
 # General information. Will be fixed across the tests
 TIMESTAMP_START = "2022-05-02T12:34:56.123456"
@@ -166,7 +173,7 @@ def test_class_method_serialization(self):
             hash_method="joblib_SHA1",
             type="test.ObjectWithMethod",
             id=232323,
-            details={})
+            details={}, value=None)
 
         function_execution = FunctionExecution(
             function=FunctionInfo('ObjectWithMethod.process',
@@ -437,16 +444,16 @@ def test_multiple_memberships(self):
         self.ttl_path = Path(__file__).parent / "res"
 
         super_container = DataObject("2333333", "joblib_SHA1",
-                                     "test.SuperContainer", 2333333, {})
+                                     "test.SuperContainer", 2333333, {}, None)
 
         super_container_list = DataObject("23333332", "joblib_SHA1",
-                                          "builtins.list", 23333332, {})
+                                          "builtins.list", 23333332, {}, None)
 
         container = DataObject("333333", "joblib_SHA1", "test.Container", 333333,
-                               {})
+                               {}, None)
 
         container_list = DataObject("3333332", "joblib_SHA1", "builtins.list",
-                                    3333332, {})
+                                    3333332, {}, None)
 
         attribute_access_container = FunctionExecution(
             function=FunctionInfo(name='attribute', module="", version=""),

From d1b9ca80b85f13cf773d6381e73a9f5c0deb13d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Fri, 14 Jul 2023 16:23:52 +0200
Subject: [PATCH 2/9] Added serialization of values, with unit tests

---
 alpaca/serialization/prov.py      |  34 ++++++++++
 alpaca/test/res/values.ttl        | 104 ++++++++++++++++++++++++++++++
 alpaca/test/test_serialization.py |  52 +++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 alpaca/test/res/values.ttl

diff --git a/alpaca/serialization/prov.py b/alpaca/serialization/prov.py
index 8aa0036..94d6ea8 100644
--- a/alpaca/serialization/prov.py
+++ b/alpaca/serialization/prov.py
@@ -10,6 +10,8 @@
 """
 
 from itertools import product
+import numpy as np
+import numbers
 
 from rdflib import Graph, URIRef, BNode, Literal
 from rdflib.namespace import RDF, PROV, XSD
@@ -59,6 +61,14 @@ class AlpacaProvDocument(object):
     control the serialization.
     """
 
+    XSD_TYPES = {
+        numbers.Integral: XSD.integer,
+        numbers.Real: XSD.double,
+        numbers.Complex: XSD.string,
+        str: XSD.string,
+        bool: XSD.boolean,
+    }
+
     def __init__(self):
         self.graph = Graph()
         self.graph.namespace_manager.bind('alpaca', ALPACA)
@@ -142,6 +152,24 @@ def _add_FunctionExecution(self, script_info, session_id, execution_id,
         return uri
 
     # Entity methods
+    @classmethod
+    def _get_entity_value_datatype(cls, info):
+        value = info.value
+        if value is None:
+            return None
+
+        # Check if builtin type or NumPy dtype
+        value_class = value.__class__ if not isinstance(value, np.number) \
+            else value.dtype.type
+        if value_class in cls.XSD_TYPES:
+            return cls.XSD_TYPES[value_class]
+
+        for possible_type in (numbers.Integral, numbers.Real, numbers.Complex):
+            if issubclass(value_class, possible_type):
+                return cls.XSD_TYPES[possible_type]
+
+        # Type not found
+        return None
 
     def _add_DataObjectEntity(self, info):
         # Adds a DataObjectEntity from the Alpaca PROV model
@@ -152,6 +180,12 @@ def _add_DataObjectEntity(self, info):
             return uri
         self.graph.add((uri, RDF.type, ALPACA.DataObjectEntity))
         self.graph.add((uri, ALPACA.hashSource, Literal(info.hash_method)))
+
+        value_datatype = self._get_entity_value_datatype(info)
+        if value_datatype:
+            self.graph.add((uri, PROV.value,
+                            Literal(info.value, datatype=value_datatype)))
+
         self._add_entity_metadata(uri, info)
         self._entity_uris.add(uri)
         return uri
diff --git a/alpaca/test/res/values.ttl b/alpaca/test/res/values.ttl
new file mode 100644
index 0000000..c065fcc
--- /dev/null
+++ b/alpaca/test/res/values.ttl
@@ -0,0 +1,104 @@
+@prefix alpaca: <http://purl.org/alpaca#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<urn:fz-juelich.de:alpaca:object:Python:test.OutputObject:54321> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" .
+
+<urn:fz-juelich.de:alpaca:object:Python:builtins.int:543211> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 1 .
+
+<urn:fz-juelich.de:alpaca:object:Python:builtins.float:543212> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 1.1e+00 .
+
+<urn:fz-juelich.de:alpaca:object:Python:builtins.str:543213> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value "test"^^xsd:string .
+
+<urn:fz-juelich.de:alpaca:object:Python:builtins.complex:543214> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value "(3+5j)"^^xsd:string .
+
+<urn:fz-juelich.de:alpaca:object:Python:builtins.bool:543215> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value true .
+
+<urn:fz-juelich.de:alpaca:object:Python:numpy.float32:543216> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 1.2e+00 .
+
+<urn:fz-juelich.de:alpaca:object:Python:numpy.float64:543217> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 1.3e+00 .
+
+<urn:fz-juelich.de:alpaca:object:Python:numpy.int64:543218> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 2 .
+
+<urn:fz-juelich.de:alpaca:object:Python:numpy.int32:543219> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value 3 .
+
+<urn:fz-juelich.de:alpaca:object:Python:numpy.int16:5432110> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value -4 .
+
+<urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> a alpaca:DataObjectEntity ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    alpaca:hashSource "joblib_SHA1" .
+
+
+<urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> a alpaca:FunctionExecution ;
+    prov:startedAtTime "2022-05-02T12:34:56.123456"^^xsd:dateTime ;
+    prov:endedAtTime "2022-05-02T12:35:56.123456"^^xsd:dateTime ;
+    prov:used <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAssociatedWith <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    alpaca:codeStatement "test_function(input_1, 5)" ;
+    alpaca:executionOrder 1 ;
+    alpaca:usedFunction <urn:fz-juelich.de:alpaca:function:Python:test.test_function> ;
+    alpaca:hasParameter [ a alpaca:NameValuePair ;
+        alpaca:pairName "param_1" ;
+        alpaca:pairValue 5 ] .
+
+<urn:fz-juelich.de:alpaca:function:Python:test.test_function> a alpaca:Function ;
+    alpaca:functionName "test_function" ;
+    alpaca:implementedIn "test" ;
+    alpaca:functionVersion "0.0.1" .
+
+<urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> a alpaca:ScriptAgent ;
+    alpaca:scriptPath "/script.py" .
diff --git a/alpaca/test/test_serialization.py b/alpaca/test/test_serialization.py
index fd9ac15..f1f08bc 100644
--- a/alpaca/test/test_serialization.py
+++ b/alpaca/test/test_serialization.py
@@ -90,6 +90,58 @@ def setUpClass(cls):
         cls.ttl_path = Path(__file__).parent / "res"
         alpaca_setting('authority', "fz-juelich.de")
 
+    def test_value_serialization(self):
+        # DataObject tuples for each type that should be captured
+        # They are output of the simulated output
+
+        INT = DataObject("543211", "joblib_SHA1", "builtins.int", 543211,
+                         {}, 1)
+        FLOAT = DataObject("543212", "joblib_SHA1", "builtins.float", 543212,
+                           {}, 1.1)
+        STR = DataObject("543213", "joblib_SHA1", "builtins.str", 543213,
+                         {}, "test")
+        COMPLEX = DataObject("543214", "joblib_SHA1", "builtins.complex",
+                             543214, {}, 3+5j)
+        BOOL = DataObject("543215", "joblib_SHA1", "builtins.bool", 543215,
+                          {}, True)
+        NUMPY_FLOAT32 = DataObject("543216", "joblib_SHA1", "numpy.float32",
+                                   543216, {}, np.float32(1.2))
+        NUMPY_FLOAT64 = DataObject("543217", "joblib_SHA1", "numpy.float64",
+                                   543217, {}, np.float64(1.3))
+        NUMPY_INT64 = DataObject("543218", "joblib_SHA1", "numpy.int64",
+                                 543218, {}, np.int64(2))
+        NUMPY_INT32 = DataObject("543219", "joblib_SHA1", "numpy.int32",
+                                 543219, {}, np.int32(3))
+        NUMPY_INT16 = DataObject("5432110", "joblib_SHA1", "numpy.int16",
+                                 5432110, {}, np.int16(-4))
+
+        function_execution = FunctionExecution(
+            function=TEST_FUNCTION,
+            input={'input_1': INPUT}, params={'param_1': 5},
+            output={0: OUTPUT, 1: INT, 2: FLOAT, 3: STR, 4: COMPLEX,
+                    5: BOOL, 6: NUMPY_FLOAT32, 7: NUMPY_FLOAT64,
+                    8: NUMPY_INT64, 9: NUMPY_INT32, 10: NUMPY_INT16},
+            call_ast=None,
+            arg_map=['input_1', 'param_1'], kwarg_map=[], return_targets=[],
+            time_stamp_start=TIMESTAMP_START, time_stamp_end=TIMESTAMP_END,
+            execution_id="12345", order=1,
+            code_statement="test_function(input_1, 5)"
+        )
+
+        # Load expected RDF graph
+        expected_graph_file = self.ttl_path / "values.ttl"
+        expected_graph = rdflib.Graph()
+        expected_graph.parse(expected_graph_file, format='turtle')
+
+        # Serialize the history using AlpacaProv document
+        alpaca_prov = AlpacaProvDocument()
+        alpaca_prov.add_history(SCRIPT_INFO, SCRIPT_SESSION_ID,
+                                history=[function_execution])
+
+        # Check if graphs are equal
+        self.assertTrue(assert_rdf_graphs_equal(alpaca_prov.graph,
+                                                expected_graph))
+
     def test_input_output_serialization(self):
         function_execution = FunctionExecution(
             function=TEST_FUNCTION,

From c7eeb7d87eb131c7d5b83ae80f7f4b85f44c8039 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Mon, 17 Jul 2023 16:11:45 +0200
Subject: [PATCH 3/9] Implemented capture of values of user-requested types,
 using the settings function. They are captured as strings.

---
 alpaca/data_information.py        | 32 ++++++++++---
 alpaca/decorator.py               | 18 +++++---
 alpaca/serialization/prov.py      |  6 +++
 alpaca/settings.py                | 13 +++++-
 alpaca/test/res/values.ttl        |  7 +++
 alpaca/test/test_decorator.py     | 75 ++++++++++++++++++++++++++++++-
 alpaca/test/test_serialization.py | 11 ++++-
 7 files changed, 147 insertions(+), 15 deletions(-)

diff --git a/alpaca/data_information.py b/alpaca/data_information.py
index 9cf3bd3..03dc8d3 100644
--- a/alpaca/data_information.py
+++ b/alpaca/data_information.py
@@ -19,6 +19,7 @@
 
 import joblib
 import numpy as np
+from numbers import Number
 from dill._dill import save_function
 
 from alpaca.alpaca_types import DataObject, File
@@ -118,6 +119,11 @@ class _ObjectInformation(object):
     :func:`hash` function, depending on the `use_builtin_hash` parameter set
     during initialization.
 
+    The values of objects of the builtin types `str`, `bool`, `int`, `complex`
+    and `float` as well as the NumPy numeric types (e.g., `np.float64`) will
+    be stored. Additional object types to be stored (e.g., `builtins.dict`)
+    can be defined with the `store_values` parameter.
+
     The method `info` is called to obtain the provenance information
     associated with the object during tracking, as the `DataObject` named
     tuple. The relevant metadata attributes are also stored in the tuple.
@@ -132,6 +138,13 @@ class _ObjectInformation(object):
         List of package names whose object hashes will be computed using the
         Python builtin `hash` function, instead of `joblib.hash` function.
         Default: None
+    store_values : list, optional
+        List of object types whose values will be stored in the provenance
+        information (e.g., `builtins.dict`). This is in addition to the
+        builtin types `str`, `bool`, `int`, `complex` and `float` as well as
+        the NumPy numeric types (e.g., `np.float64`). The values of these are
+        always stored.
+        Default: None
     """
 
     # This is a list of object attributes that provide relevant provenance
@@ -140,10 +153,12 @@ class _ObjectInformation(object):
                             'id', 'nix_name', 'dimensionality', 'pid',
                             'create_time')
 
-    def __init__(self, use_builtin_hash=None):
+    def __init__(self, use_builtin_hash=None, store_values=None):
         self._hash_memoizer = dict()
         self._use_builtin_hash = copy(use_builtin_hash) \
             if use_builtin_hash is not None else []
+        self._store_values = copy(store_values)\
+            if store_values is not None else []
 
     @staticmethod
     def _get_object_package(obj):
@@ -259,9 +274,10 @@ def info(self, obj):
             * details : dict
                 Extended information (metadata) on the object.
             * value : object
-                For builtin objects (`str`, `int`, `float`, `bool`) or
-                equivalent objects (e.g. `numpy.float64`), the value is
-                stored.
+                For builtin objects (`str`, `int`, `float`, `bool`, `complex`)
+                or equivalent objects (e.g. `numpy.float64`), the value is
+                stored. Additional object types specified with the
+                :attr:`store_values` list will also be stored.
         """
         type_information = type(obj)
         obj_type = f"{type_information.__module__}.{type_information.__name__}"
@@ -295,8 +311,12 @@ def info(self, obj):
                                                       obj_id=obj_id,
                                                       package=package)
 
-        obj_value = obj if isinstance(obj, (str, int, bool, complex, float,
-                                            np.number)) else None
+        # Store object value
+        obj_value = None
+        if isinstance(obj, (str, bool, Number)):
+            obj_value = obj
+        elif obj_type in self._store_values:
+            obj_value = str(obj)
 
         return DataObject(hash=obj_hash, hash_method=hash_method,
                           type=obj_type, id=obj_id, details=details,
diff --git a/alpaca/decorator.py b/alpaca/decorator.py
index f502176..902b465 100644
--- a/alpaca/decorator.py
+++ b/alpaca/decorator.py
@@ -365,7 +365,8 @@ def _capture_code_and_function_provenance(self, lineno, function):
         return source_line, ast_tree, return_targets, function_info
 
     def _capture_input_and_parameters_provenance(self, function, args, kwargs,
-        ast_tree, function_info, time_stamp_start, builtin_object_hash):
+        ast_tree, function_info, time_stamp_start, builtin_object_hash,
+        store_values):
 
         # 1. Extract the parameters passed to the function and store them in
         # the `input_data` dictionary.
@@ -389,7 +390,8 @@ def _capture_input_and_parameters_provenance(self, function, args, kwargs,
         # After this step, all hashes and metadata of input parameters/files
         # are going to be stored in the dictionary `inputs`.
 
-        data_info = _ObjectInformation(use_builtin_hash=builtin_object_hash)
+        data_info = _ObjectInformation(use_builtin_hash=builtin_object_hash,
+                                       store_values=store_values)
 
         # Initialize parameter list with all default arguments that were not
         # passed to the function
@@ -502,11 +504,12 @@ def _capture_container_output(self, function_output, data_info,
     def _capture_output_provenance(self, function_output, return_targets,
                                    input_data, builtin_object_hash,
                                    time_stamp_start, execution_id,
-                                   constructed_object=None):
+                                   store_values, constructed_object=None):
 
         # In case in-place operations were performed, lets not use
         # memoization
-        data_info = _ObjectInformation(use_builtin_hash=builtin_object_hash)
+        data_info = _ObjectInformation(use_builtin_hash=builtin_object_hash,
+                                       store_values=store_values)
 
         # 6. Create hash for the output using `_ObjectInformation` to follow
         # individual returns. The hashes will be stored in the `outputs`
@@ -552,7 +555,8 @@ def wrapped(*args, **kwargs):
 
             builtin_object_hash = _ALPACA_SETTINGS[
                 'use_builtin_hash_for_module']
-            logger.debug(f"Builtin object hash: {builtin_object_hash}")
+            store_values = _ALPACA_SETTINGS['store_values']
+            logging.debug(f"Builtin object hash: {builtin_object_hash}")
 
             lineno = None
 
@@ -593,7 +597,8 @@ def wrapped(*args, **kwargs):
                                 function=function, args=args, kwargs=kwargs,
                                 ast_tree=ast_tree, function_info=function_info,
                                 time_stamp_start=time_stamp_start,
-                                builtin_object_hash=builtin_object_hash)
+                                builtin_object_hash=builtin_object_hash,
+                                store_values=store_values)
 
             # Call the function
             function_output = function(*args, **kwargs)
@@ -620,6 +625,7 @@ def wrapped(*args, **kwargs):
                     builtin_object_hash=builtin_object_hash,
                     time_stamp_start=time_stamp_start,
                     execution_id=execution_id,
+                    store_values=store_values,
                     constructed_object=constructed_object)
 
                 # Get the end time stamp
diff --git a/alpaca/serialization/prov.py b/alpaca/serialization/prov.py
index 94d6ea8..7b4f188 100644
--- a/alpaca/serialization/prov.py
+++ b/alpaca/serialization/prov.py
@@ -164,6 +164,12 @@ def _get_entity_value_datatype(cls, info):
         if value_class in cls.XSD_TYPES:
             return cls.XSD_TYPES[value_class]
 
+        # Check if object is include in the `store_values` setting.
+        # In this case, they are always stored as strings
+        obj_type = info.type
+        if obj_type in _ALPACA_SETTINGS['store_values']:
+            return XSD.string
+
         for possible_type in (numbers.Integral, numbers.Real, numbers.Complex):
             if issubclass(value_class, possible_type):
                 return cls.XSD_TYPES[possible_type]
diff --git a/alpaca/settings.py b/alpaca/settings.py
index 6860755..0545989 100644
--- a/alpaca/settings.py
+++ b/alpaca/settings.py
@@ -51,6 +51,16 @@
 
         Default: "my-authority"
 
+* **store_values**: list of str
+        The values of the objects from the types in the list will be stored
+        together with the provenance information. Note that objects of the
+        builtin types `str`, `bool`, `int`, `float` and `complex`, as well as
+        the NumPy numeric types (e.g. `numpy.float64`) are stored by default.
+        This option should be used to store values of more complex types, such
+        as dictionaries. In this case, the list in this setting should have
+        the `builtins.dict` entry. The strings are the full path to the Python
+        object, i.e., `[module].[...].[object_class]`.
+
 
 To set/read a setting, use the function :func:`alpaca_setting`.
 
@@ -61,7 +71,8 @@
 # Should be modified only through the `alpaca_setting` function.
 
 _ALPACA_SETTINGS = {'use_builtin_hash_for_module': [],
-                    'authority': "my-authority"}
+                    'authority': "my-authority",
+                    'store_values': []}
 
 
 def alpaca_setting(name, value=None):
diff --git a/alpaca/test/res/values.ttl b/alpaca/test/res/values.ttl
index c065fcc..4483803 100644
--- a/alpaca/test/res/values.ttl
+++ b/alpaca/test/res/values.ttl
@@ -78,6 +78,13 @@
     alpaca:hashSource "joblib_SHA1" ;
     prov:value -4 .
 
+<urn:fz-juelich.de:alpaca:object:Python:builtins.dict:5432111> a alpaca:DataObjectEntity ;
+    prov:wasDerivedFrom <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> ;
+    prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
+    prov:wasGeneratedBy <urn:fz-juelich.de:alpaca:function_execution:Python:111111:999999:test.test_function#12345> ;
+    alpaca:hashSource "joblib_SHA1" ;
+    prov:value "{'id': [1, 2, 3], 'value': {4, 5, 6}}"^^xsd:string .
+
 <urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345> a alpaca:DataObjectEntity ;
     prov:wasAttributedTo <urn:fz-juelich.de:alpaca:script:Python:script.py:111111#999999> ;
     alpaca:hashSource "joblib_SHA1" .
diff --git a/alpaca/test/test_decorator.py b/alpaca/test/test_decorator.py
index 1445931..c9a3cef 100644
--- a/alpaca/test/test_decorator.py
+++ b/alpaca/test/test_decorator.py
@@ -15,7 +15,7 @@
 import neo
 
 from alpaca import (Provenance, activate, deactivate, save_provenance,
-                    print_history)
+                    print_history, alpaca_setting)
 from alpaca.alpaca_types import (FunctionInfo, Container, DataObject, File)
 
 # Define some data and expected values test tracking
@@ -1303,5 +1303,78 @@ def test_class_constructor_container_output(self):
             test_case=self)
 
 
+@Provenance(inputs=['source'])
+def use_dict(source):
+    return 3
+
+class ProvenanceDecoratorStoreValuesTestCase(unittest.TestCase):
+
+    def setUp(self):
+        alpaca_setting('store_values', [])
+
+    def test_capture_dict(self):
+        # This should have values for both the input dictionary and the
+        # integer return
+        alpaca_setting('store_values', ['builtins.dict'])
+        activate(clear=True)
+        test_dict = dict(id=[1, 2, 3], value={4, 5, 6})
+        res = use_dict(test_dict)
+        deactivate()
+
+        dict_info = DataObject(hash=joblib.hash(test_dict, hash_name='sha1'),
+                               hash_method="joblib_SHA1",
+                               type="builtins.dict", id=id(test_dict),
+                               details={},
+                               value="{'id': [1, 2, 3], 'value': {4, 5, 6}}")
+
+        expected_output = DataObject(hash=joblib.hash(3, hash_name='sha1'),
+                                     hash_method="joblib_SHA1",
+                                     type="builtins.int", id=id(res),
+                                     details={}, value=3)
+
+        _check_function_execution(
+            actual=Provenance.history[0],
+            exp_function=FunctionInfo('use_dict', 'test_decorator', ''),
+            exp_input={'source': dict_info},
+            exp_params={},
+            exp_output={0: expected_output},
+            exp_arg_map=['source'],
+            exp_kwarg_map=[],
+            exp_code_stmnt="res = use_dict(test_dict)",
+            exp_return_targets=['res'],
+            exp_order=1,
+            test_case=self)
+
+    def test_capture_builtins_only(self):
+        # This should have values only for the integer return
+        activate(clear=True)
+        test_dict = dict(id=[1, 2, 3], value={4, 5, 6})
+        res = use_dict(test_dict)
+        deactivate()
+
+        dict_info = DataObject(hash=joblib.hash(test_dict, hash_name='sha1'),
+                               hash_method="joblib_SHA1",
+                               type="builtins.dict", id=id(test_dict),
+                               details={}, value=None)
+
+        expected_output = DataObject(hash=joblib.hash(3, hash_name='sha1'),
+                                     hash_method="joblib_SHA1",
+                                     type="builtins.int", id=id(res),
+                                     details={}, value=3)
+
+        _check_function_execution(
+            actual=Provenance.history[0],
+            exp_function=FunctionInfo('use_dict', 'test_decorator', ''),
+            exp_input={'source': dict_info},
+            exp_params={},
+            exp_output={0: expected_output},
+            exp_arg_map=['source'],
+            exp_kwarg_map=[],
+            exp_code_stmnt="res = use_dict(test_dict)",
+            exp_return_targets=['res'],
+            exp_order=1,
+            test_case=self)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/alpaca/test/test_serialization.py b/alpaca/test/test_serialization.py
index f1f08bc..96304b5 100644
--- a/alpaca/test/test_serialization.py
+++ b/alpaca/test/test_serialization.py
@@ -90,9 +90,13 @@ def setUpClass(cls):
         cls.ttl_path = Path(__file__).parent / "res"
         alpaca_setting('authority', "fz-juelich.de")
 
+    def setUp(self):
+        alpaca_setting('store_values', [])
+
     def test_value_serialization(self):
         # DataObject tuples for each type that should be captured
         # They are output of the simulated output
+        alpaca_setting('store_values', ['builtins.dict'])
 
         INT = DataObject("543211", "joblib_SHA1", "builtins.int", 543211,
                          {}, 1)
@@ -115,12 +119,17 @@ def test_value_serialization(self):
         NUMPY_INT16 = DataObject("5432110", "joblib_SHA1", "numpy.int16",
                                  5432110, {}, np.int16(-4))
 
+        DICT = DataObject("5432111", "joblib_SHA1", "builtins.dict",
+                          5432111, {},
+                          str(dict(id=[1, 2, 3], value={4, 5, 6})))
+
         function_execution = FunctionExecution(
             function=TEST_FUNCTION,
             input={'input_1': INPUT}, params={'param_1': 5},
             output={0: OUTPUT, 1: INT, 2: FLOAT, 3: STR, 4: COMPLEX,
                     5: BOOL, 6: NUMPY_FLOAT32, 7: NUMPY_FLOAT64,
-                    8: NUMPY_INT64, 9: NUMPY_INT32, 10: NUMPY_INT16},
+                    8: NUMPY_INT64, 9: NUMPY_INT32, 10: NUMPY_INT16,
+                    11: DICT},
             call_ast=None,
             arg_map=['input_1', 'param_1'], kwarg_map=[], return_targets=[],
             time_stamp_start=TIMESTAMP_START, time_stamp_end=TIMESTAMP_END,

From 16f89beed2be69155feecff36a4a4845b38ef466 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Mon, 17 Jul 2023 16:20:31 +0200
Subject: [PATCH 4/9] Unit tests for the capture of values of user-requested
 types.

---
 alpaca/test/test_data_information.py | 29 ++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/alpaca/test/test_data_information.py b/alpaca/test/test_data_information.py
index 3d6e738..6b47032 100644
--- a/alpaca/test/test_data_information.py
+++ b/alpaca/test/test_data_information.py
@@ -106,6 +106,35 @@ def test_none(self):
         self.assertEqual(info.hash_method, "UUID")
         self.assertDictEqual(info.details, {})
 
+    def test_store_value_requested(self):
+        object_info = _ObjectInformation(store_values=['builtins.dict'])
+        test_dict = dict(key=['3', '4'])
+        info = object_info.info(test_dict)
+        self.assertEqual(info.hash, joblib.hash(test_dict, hash_name='sha1'))
+        self.assertEqual(info.type, "builtins.dict")
+        self.assertEqual(info.hash_method, "joblib_SHA1")
+        self.assertDictEqual(info.details, {})
+        self.assertEqual(info.value, "{'key': ['3', '4']}")
+
+    def test_store_value_not_requested(self):
+        object_info = _ObjectInformation()
+        test_dict = dict(key=['3', '4'])
+        info = object_info.info(test_dict)
+        self.assertEqual(info.hash, joblib.hash(test_dict, hash_name='sha1'))
+        self.assertEqual(info.type, "builtins.dict")
+        self.assertEqual(info.hash_method, "joblib_SHA1")
+        self.assertDictEqual(info.details, {})
+        self.assertEqual(info.value, None)
+
+    def test_store_value_builtins(self):
+        object_info = _ObjectInformation()
+        info = object_info.info(5)
+        self.assertEqual(info.hash, joblib.hash(5, hash_name='sha1'))
+        self.assertEqual(info.type, "builtins.int")
+        self.assertEqual(info.hash_method, "joblib_SHA1")
+        self.assertDictEqual(info.details, {})
+        self.assertEqual(info.value, 5)
+
     def test_custom_class(self):
         custom_object_1 = ObjectClass(param=4)
         custom_object_2 = ObjectClass(param=3)

From da0f04199d7619e154c625264c35c6301d5452a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Thu, 10 Aug 2023 09:44:28 +0200
Subject: [PATCH 5/9] Updated unit tests

---
 alpaca/test/test_code_analysis.py | 17 ++++---
 alpaca/test/test_decorator.py     | 75 ++++++++++++++++++-------------
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/alpaca/test/test_code_analysis.py b/alpaca/test/test_code_analysis.py
index b0ac144..47c07a1 100644
--- a/alpaca/test/test_code_analysis.py
+++ b/alpaca/test/test_code_analysis.py
@@ -552,19 +552,22 @@ def test_attribute_method_call(self):
             hash=joblib_hash(np.sum(TEST_ARRAY)),
             hash_method="joblib_SHA1",
             type="numpy.int64", id=id(res),
-            details={'shape': (), 'dtype': np.int64})
+            details={'shape': (), 'dtype': np.int64},
+            value=6)
 
         object_info = DataObject(
             hash=joblib_hash(object_with_method),
             hash_method="joblib_SHA1",
             type="test_code_analysis.ObjectWithMethod",
             id=id(object_with_method),
-            details={})
+            details={},
+            value=None)
 
         expected_container_info = DataObject(
             hash=joblib_hash(container_of_array), hash_method="joblib_SHA1",
             type="test_code_analysis.ContainerOfArray",
-            id=id(container_of_array), details={'array': TEST_ARRAY})
+            id=id(container_of_array), details={'array': TEST_ARRAY},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -604,17 +607,19 @@ def test_subscript_initializer(self):
             hash=joblib_hash(custom_object),
             hash_method="joblib_SHA1",
             type="test_code_analysis.CustomObject", id=id(custom_object),
-            details={'data': list_1})
+            details={'data': list_1}, value=None)
 
         source_list_info = DataObject(
             hash=joblib_hash(source_data),
             hash_method="joblib_SHA1",
-            type="builtins.list", id=id(source_data), details={})
+            type="builtins.list", id=id(source_data), details={},
+            value=None)
 
         element_info = DataObject(
             hash=joblib_hash(list_1),
             hash_method="joblib_SHA1",
-            type="builtins.list", id=id(list_1), details={})
+            type="builtins.list", id=id(list_1), details={},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
diff --git a/alpaca/test/test_decorator.py b/alpaca/test/test_decorator.py
index c9a3cef..d1e69c9 100644
--- a/alpaca/test/test_decorator.py
+++ b/alpaca/test/test_decorator.py
@@ -646,29 +646,30 @@ def test_container_output_function_level(self):
 
         elements = [[], []]
         for idx, container in enumerate(res):
-            for el_idx, element in enumerate(container):
+            for element in container:
                 element_info = DataObject(
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
+                    details={'shape': (), 'dtype': np.int64},
+                    value=element)
                 elements[idx].append(element_info)
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.list", id=id(res), details={})
+            type="builtins.list", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         # Check subscript of each element with respect to the array
         containers = [expected_container_1, expected_container_2]
@@ -741,19 +742,21 @@ def test_dict_output_function(self):
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.dict", id=id(res), details={})
+            type="builtins.dict", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res['key.0']),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res['key.1']),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -805,29 +808,32 @@ def test_dict_output_function_level(self):
 
         elements = {'key.0': [], 'key.1': []}
         for key, container in res.items():
-            for el_idx, element in enumerate(container):
+            for element in container:
                 element_info = DataObject(
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
+                    details={'shape': (), 'dtype': np.int64},
+                    value=element)
                 elements[key].append(element_info)
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.dict", id=id(res), details={})
+            type="builtins.dict", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 3, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res['key.0']),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res['key.1']),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         # Check subscript of each element with respect to the array
         containers = {
@@ -903,18 +909,19 @@ def test_non_iterable_container_output(self):
         self.assertEqual(len(Provenance.history), 4)
 
         elements = []
-        for el_idx, element in enumerate(res):
-                element_info = DataObject(
-                    hash=joblib.hash(element, hash_name="sha1"),
-                    hash_method="joblib_SHA1",
-                    type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
-                elements.append(element_info)
+        for element in res:
+            element_info = DataObject(
+                hash=joblib.hash(element, hash_name="sha1"),
+                hash_method="joblib_SHA1",
+                type="numpy.int64", id=None,
+                details={'shape': (), 'dtype': np.int64},
+                value=element)
+            elements.append(element_info)
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
             type="test_decorator.NonIterableContainer", id=id(res),
-            details={'data': res.data})
+            details={'data': res.data}, value=None)
 
         # Check subscript of each element with respect to the container
         for history_index in (0, 1, 2):
@@ -947,7 +954,6 @@ def test_non_iterable_container_output(self):
             exp_order=1,
             test_case=self)
 
-
     def test_comprehensions(self):
         activate(clear=True)
         num_list = [comprehension_function(i) for i in range(3)]
@@ -966,7 +972,8 @@ def test_comprehensions(self):
                 hash=joblib.hash(element, hash_name='sha1'),
                 hash_method="joblib_SHA1",
                 type="numpy.float64", id=id(element),
-                details={'shape': (), 'dtype': np.float64})
+                details={'shape': (), 'dtype': np.float64},
+                value=element)
 
             _check_function_execution(
                 actual=Provenance.history[history],
@@ -988,7 +995,8 @@ def test_comprehensions(self):
                 hash=joblib.hash(element, hash_name='sha1'),
                 hash_method="joblib_SHA1",
                 type="numpy.float64", id=id(element),
-                details={'shape': (), 'dtype': np.float64})
+                details={'shape': (), 'dtype': np.float64},
+                value=element)
 
             _check_function_execution(
                 actual=Provenance.history[history],
@@ -1010,7 +1018,8 @@ def test_comprehensions(self):
                 hash=joblib.hash(element, hash_name='sha1'),
                 hash_method="joblib_SHA1",
                 type="numpy.float64", id=id(element),
-                details={'shape': (), 'dtype': np.float64})
+                details={'shape': (), 'dtype': np.float64},
+                value=element)
 
             _check_function_execution(
                 actual=Provenance.history[history],
@@ -1157,7 +1166,8 @@ def test_method_descriptor(self):
                      'file_origin': None, 'description': None, 'segment': None,
                      'units': pq.mV.units, 'shape': (3, 1), 'dtype': np.int64,
                      't_start': 0 * pq.s, 't_stop': 3 * pq.s,
-                     'dimensionality': pq.mV.dimensionality})
+                     'dimensionality': pq.mV.dimensionality},
+            value=None)
 
         expected_output = DataObject(
             hash=joblib.hash(reshaped, hash_name='sha1'),
@@ -1169,7 +1179,8 @@ def test_method_descriptor(self):
                      'file_origin': None, 'description': None, 'segment': None,
                      'units': pq.mV.units, 'shape': (1, 3), 'dtype': np.int64,
                      't_start': 0 * pq.s, 't_stop': 1 * pq.s,
-                     'dimensionality': pq.mV.dimensionality})
+                     'dimensionality': pq.mV.dimensionality},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -1197,7 +1208,8 @@ def test_class_constructor(self):
             hash_method="joblib_SHA1",
             type="test_decorator.ObjectWithMethod",
             id=id(obj),
-            details={'coefficient': 2})
+            details={'coefficient': 2},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],
@@ -1261,7 +1273,8 @@ def test_class_constructor_container_output(self):
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
+                    details={'shape': (), 'dtype': np.int64},
+                    value=element)
                 elements.append(element_info)
 
         expected_output = DataObject(
@@ -1269,7 +1282,8 @@ def test_class_constructor_container_output(self):
             hash_method="joblib_SHA1",
             type="test_decorator.NonIterableContainerOutputObject",
             id=id(obj),
-            details={'_data': obj._data})
+            details={'_data': obj._data},
+            value=None)
 
         # Check subscript of each element with respect to the container
         for history_index in (0, 1, 2):
@@ -1307,6 +1321,7 @@ def test_class_constructor_container_output(self):
 def use_dict(source):
     return 3
 
+
 class ProvenanceDecoratorStoreValuesTestCase(unittest.TestCase):
 
     def setUp(self):

From f19b3da1a512a155a4331f7f7495eabcd5c30819 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Tue, 21 Nov 2023 15:09:44 +0100
Subject: [PATCH 6/9] Added option to display stored value for the node in the
 visualization graph

---
 alpaca/graph.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/alpaca/graph.py b/alpaca/graph.py
index 5983273..198e366 100644
--- a/alpaca/graph.py
+++ b/alpaca/graph.py
@@ -98,7 +98,7 @@ def _get_name_value_pair(graph, bnode):
 
 
 def _get_entity_data(graph, entity, annotations=None, attributes=None,
-                     strip_namespace=True):
+                     strip_namespace=True, value_attribute=None):
     filter_map = defaultdict(list)
 
     filter_map.update(
@@ -118,6 +118,12 @@ def _get_entity_data(graph, entity, annotations=None, attributes=None,
                     _add_attribute(data, attr_name, attr_type, attr_value,
                                    strip_namespace)
 
+    # Get the stored value if requested and present
+    if value_attribute:
+        value = graph.value(entity, PROV.value)
+        if value:
+            data[value_attribute] = value.toPython()
+
     if data['type'] == NSS_FILE:
         file_path = str(list(graph.objects(entity, ALPACA.filePath))[0])
         data["File_path"] = file_path
@@ -223,6 +229,15 @@ class name of the object (e.g., `ndarray`). The `Python_name` node
         time interval strings in the format supported by the Gephi timeline
         feature. If False, the attribute is not included.
         Default: True
+    value_attribute : str, optional
+        If provided, an attribute named `value_attribute` will be added to
+        the node attributes to show the values stored in the provenance
+        information. Alpaca stores the values of objects of the builtin types
+        `str`, `bool`, `int`, `float` and `complex`, as well as the NumPy
+        numeric types (e.g. `numpy.float64`) by default. The values of
+        additional types can be defined using the
+        :func:`alpaca.settings.alpaca_setting` function.
+        Default: None
 
     Attributes
     ----------
@@ -235,7 +250,7 @@ class name of the object (e.g., `ndarray`). The `Python_name` node
     def __init__(self, *prov_file, annotations=None, attributes=None,
                  strip_namespace=True, remove_none=True,
                  use_name_in_parameter=True, use_class_in_method_name=True,
-                 time_intervals=True):
+                 time_intervals=True, value_attribute=None):
 
         # Load PROV records from the file(s)
         doc = AlpacaProvDocument()
@@ -250,7 +265,7 @@ def __init__(self, *prov_file, annotations=None, attributes=None,
             strip_namespace=strip_namespace, remove_none=remove_none,
             use_name_in_parameter=use_name_in_parameter,
             use_class_in_method_name=use_class_in_method_name,
-            time_intervals=time_intervals
+            time_intervals=time_intervals, value_attribute=value_attribute
         )
 
         if time_intervals:
@@ -319,7 +334,7 @@ def _transform_graph(graph, annotations=None, attributes=None,
                          strip_namespace=True, remove_none=True,
                          use_name_in_parameter=True,
                          use_class_in_method_name=True,
-                         time_intervals=True):
+                         time_intervals=True, value_attribute=None):
         # Transform an RDFlib graph obtained from the PROV data, so that the
         # visualization is simplified. A new `nx.DiGraph` object is created
         # and returned. Annotations and attributes of the entities stored in
@@ -341,7 +356,8 @@ def _transform_graph(graph, annotations=None, attributes=None,
             data = _get_entity_data(graph, entity,
                                     annotations=annotations,
                                     attributes=attributes,
-                                    strip_namespace=strip_namespace)
+                                    strip_namespace=strip_namespace,
+                                    value_attribute=value_attribute)
             transformed.add_node(node_id, **data)
 
         # Add all the edges.

From 22141fb79a06904edc70e7a43d8af50173558f4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Tue, 21 Nov 2023 15:10:10 +0100
Subject: [PATCH 7/9] Unit tests for visualization graph

---
 alpaca/test/test_graph.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/alpaca/test/test_graph.py b/alpaca/test/test_graph.py
index 46bb78b..f07314e 100644
--- a/alpaca/test/test_graph.py
+++ b/alpaca/test/test_graph.py
@@ -440,6 +440,32 @@ def test_remove_multiple_attributes_aggregation(self):
             self.assertTrue("Time Interval" not in node_attrs)
             self.assertTrue("sua" not in node_attrs)
 
+    def test_value_attribute(self):
+        input_file = self.ttl_path / "values.ttl"
+        graph = ProvenanceGraph(input_file, attributes='all',
+                                annotations='all', value_attribute='value')
+
+        node_values_by_id = {
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.int:543211": 1,
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.float:543212": 1.1,
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.str:543213": "test",
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.complex:543214": "(3+5j)",
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.bool:543215": True,
+            "urn:fz-juelich.de:alpaca:object:Python:numpy.float32:543216": 1.2,
+            "urn:fz-juelich.de:alpaca:object:Python:numpy.float64:543217": 1.3,
+            "urn:fz-juelich.de:alpaca:object:Python:numpy.int64:543218": 2,
+            "urn:fz-juelich.de:alpaca:object:Python:numpy.int32:543219": 3,
+            "urn:fz-juelich.de:alpaca:object:Python:numpy.int16:5432110": -4,
+            "urn:fz-juelich.de:alpaca:object:Python:builtins.dict:5432111": "{'id': [1, 2, 3], 'value': {4, 5, 6}}",
+            "urn:fz-juelich.de:alpaca:object:Python:test.InputObject:12345": None,
+            "urn:fz-juelich.de:alpaca:object:Python:test.OutputObject:54321": None,
+        }
+
+        for node, node_attrs in graph.graph.nodes(data=True):
+            if node_attrs['type'] == 'object':
+                expected_value = node_values_by_id[node]
+                self.assertEqual(expected_value, node_attrs.get('value', None))
+
 
 class GraphTimeIntervalTestCase(unittest.TestCase):
 

From 8c66a5d2b6a01bdefe0893841a04d589bf6b8755 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Tue, 21 Nov 2023 15:10:41 +0100
Subject: [PATCH 8/9] Added missing value entries to new unit tests

---
 alpaca/test/test_decorator.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/alpaca/test/test_decorator.py b/alpaca/test/test_decorator.py
index 20253cf..d41ab49 100644
--- a/alpaca/test/test_decorator.py
+++ b/alpaca/test/test_decorator.py
@@ -707,19 +707,19 @@ def test_container_output_function_level_0(self):
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.list", id=id(res), details={})
+            type="builtins.list", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 7, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 8, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         # Check the subscript of each array with respect to the list returned
         _check_function_execution(
@@ -869,19 +869,19 @@ def test_container_output_function_level_range_0_0(self):
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.list", id=id(res), details={})
+            type="builtins.list", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 1, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 2, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         # Check the subscript of each array with respect to the list returned
         _check_function_execution(
@@ -940,24 +940,24 @@ def test_container_output_function_level_range_0_1(self):
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
+                    details={'shape': (), 'dtype': np.int64}, value=None)
                 elements[idx].append(element_info)
 
         expected_output = DataObject(
             hash=joblib.hash(res, hash_name="sha1"), hash_method="joblib_SHA1",
-            type="builtins.list", id=id(res), details={})
+            type="builtins.list", id=id(res), details={}, value=None)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 5, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 6, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64}, value=None)
 
         # Check subscript of each element with respect to the array
         containers = [expected_container_1, expected_container_2]
@@ -1035,20 +1035,23 @@ def test_container_output_function_level_range_1_1(self):
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64})
+                    details={'shape': (), 'dtype': np.int64},
+                    value=None)
                 elements[idx].append(element_info)
 
         expected_container_1 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[0]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         expected_container_2 = DataObject(
             hash=joblib.hash(TEST_ARRAY + 5, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res[1]),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         # Check subscript of each element with respect to the array
         containers = [expected_container_1, expected_container_2]
@@ -1521,13 +1524,15 @@ def test_static_method(self):
             hash_method="joblib_SHA1",
             type="test_decorator.ObjectWithMethod",
             id=id(obj),
-            details={'coefficient': 2})
+            details={'coefficient': 2},
+            value=None)
 
         expected_output = DataObject(
             hash=joblib.hash(TEST_ARRAY+4, hash_name='sha1'),
             hash_method="joblib_SHA1",
             type="numpy.ndarray", id=id(res),
-            details={'shape': (3,), 'dtype': np.int64})
+            details={'shape': (3,), 'dtype': np.int64},
+            value=None)
 
         _check_function_execution(
             actual=Provenance.history[0],

From afde4e197df307dae77cc92707ac36a7dedcd5a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cristiano=20K=C3=B6hler?= <c.koehler@fz-juelich.de>
Date: Tue, 21 Nov 2023 15:16:09 +0100
Subject: [PATCH 9/9] Added missing value entries to new unit tests

---
 alpaca/test/test_decorator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/alpaca/test/test_decorator.py b/alpaca/test/test_decorator.py
index d41ab49..2ea6dd3 100644
--- a/alpaca/test/test_decorator.py
+++ b/alpaca/test/test_decorator.py
@@ -940,7 +940,7 @@ def test_container_output_function_level_range_0_1(self):
                     hash=joblib.hash(element, hash_name="sha1"),
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
-                    details={'shape': (), 'dtype': np.int64}, value=None)
+                    details={'shape': (), 'dtype': np.int64}, value=element)
                 elements[idx].append(element_info)
 
         expected_output = DataObject(
@@ -1036,7 +1036,7 @@ def test_container_output_function_level_range_1_1(self):
                     hash_method="joblib_SHA1",
                     type="numpy.int64", id=None,
                     details={'shape': (), 'dtype': np.int64},
-                    value=None)
+                    value=element)
                 elements[idx].append(element_info)
 
         expected_container_1 = DataObject(