From 0063cc0251ccecd13b546667eb04787b75b6412f Mon Sep 17 00:00:00 2001 From: raphaelDkhn Date: Thu, 18 Jan 2024 16:18:30 +0200 Subject: [PATCH 1/3] refactor serializer --- osiris/cairo/serde/serialize.py | 30 ++++++++++--------------- tests/test_serialize.py | 39 ++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/osiris/cairo/serde/serialize.py b/osiris/cairo/serde/serialize.py index 8223530..c49276c 100644 --- a/osiris/cairo/serde/serialize.py +++ b/osiris/cairo/serde/serialize.py @@ -6,30 +6,24 @@ ) -def serializer(data) -> list[str]: +def serializer(data): if isinstance(data, bool): - return ["1"] if data else ["0"] + return "1" if data else "0" elif isinstance(data, int): - return [str(data)] + if data >= 0: + return f"{data}" + else: + raise ValueError("Native signed integers are not supported yet") + # TODO: Support native singned-int elif isinstance(data, (list, tuple)): - serialized_list = [str(len(data))] - for item in data: - serialized_list.extend(serializer(item)) - return serialized_list - elif isinstance(data, dict): - serialized_dict = [str(len(data))] - for key, value in data.items(): - serialized_dict.extend(serializer(key)) - serialized_dict.extend(serializer(value)) - return serialized_dict + joined_elements = ' '.join(serializer(e) for e in data) + return f"[{joined_elements}]" elif isinstance(data, Tensor): - serialized_tensor = serializer(data.shape) - serialized_tensor.extend(serializer(data.data)) - return serialized_tensor + return f"{serializer(data.shape)} {serializer(data.data)}" elif isinstance(data, (SignedInt, FixedPoint)): - return [str(data.mag), str(data.sign)] + return f"{serializer(data.mag)} {serializer(data.sign)}" elif isinstance(data, UnsignedInt): - return [str(data.mag)] + return f"{data.mag}" else: raise ValueError("Unsupported data type for serialization") diff --git a/tests/test_serialize.py b/tests/test_serialize.py index cb92cbb..cf4750d 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -25,15 +25,42 @@ def test_create_tensor_from_array_with_invalid_input(): create_tensor_from_array("not a numpy array") -def test_serializer_for_tensor_signedint(): - arr = np.array([[1, 2], [3, 4]], dtype=np.int64) - tensor = create_tensor_from_array(arr) - serialized_data = serializer(tensor) - assert isinstance(serialized_data, list) +def test_serializer_for_boolean(): + data = False + serialized_data = serializer(data) + assert serialized_data == "0" + + +def test_serializer_for_int(): + data = 42 + serialized_data = serializer(data) + assert serialized_data == "42" + +def test_serializer_for_list(): + data = [1, 2, 3] + serialized_data = serializer(data) + assert serialized_data == "[1 2 3]" + + +def test_serializer_for_tuple(): + data = (1, 2, 3) + serialized_data = serializer(data) + assert serialized_data == "[1 2 3]" + +def test_serializer_for_fixedpoint(): + data = FixedPoint(42, True) + serialized_data = serializer(data) + assert serialized_data == "42 1" def test_serializer_for_tensor_uint(): arr = np.array([[1, 2], [3, 4]], dtype=np.uint64) tensor = create_tensor_from_array(arr) serialized_data = serializer(tensor) - assert isinstance(serialized_data, list) + assert serialized_data == "[2 2] [1 2 3 4]" + +def test_serializer_for_tensor_fixedpoint(): + arr = np.array([[1, 2], [3, 4]], dtype=np.float32) + tensor = create_tensor_from_array(arr) + serialized_data = serializer(tensor) + assert serialized_data == "[2 2] [65536 0 131072 0 196608 0 262144 0]" \ No newline at end of file From 987d083270f0b7b06083cd1296cafa0b172c228b Mon Sep 17 00:00:00 2001 From: raphaelDkhn Date: Fri, 19 Jan 2024 17:30:23 +0200 Subject: [PATCH 2/3] modify deserializer --- osiris/cairo/serde/deserialize.py | 205 ++++++++++++++++-------------- tests/test_deserialize.py | 99 +++++++-------- 2 files changed, 156 insertions(+), 148 deletions(-) diff --git a/osiris/cairo/serde/deserialize.py b/osiris/cairo/serde/deserialize.py index 283e5bc..bbf57be 100644 --- a/osiris/cairo/serde/deserialize.py +++ b/osiris/cairo/serde/deserialize.py @@ -28,20 +28,21 @@ def deserializer(serialized: list, data_type: str, fp_impl='FP16x16'): return deserialize_tensor_uint(serialized) elif data_type == 'tensor_signed_int': return deserialize_tensor_signed_int(serialized) - elif data_type == 'tensor_fixed_point': - return deserialize_tensor_fixed_point(serialized, fp_impl) - elif data_type == 'tuple_uint': - return deserialize_tuple_uint(serialized) - elif data_type == 'tuple_signed_int': - return deserialize_tuple_signed_int(serialized) - elif data_type == 'tuple_fixed_point': - return deserialize_tuple_fixed_point(serialized, fp_impl) - elif data_type == 'tuple_tensor_uint': - return deserialize_tuple_tensor_uint(serialized) - elif data_type == 'tuple_tensor_signed_int': - return deserialize_tuple_tensor_signed_int(serialized) - elif data_type == 'tuple_tensor_fixed_point': - return deserialize_tuple_tensor_fixed_point(serialized, fp_impl) + # TODO: Support Tuples + # elif data_type == 'tensor_fixed_point': + # return deserialize_tensor_fixed_point(serialized, fp_impl) + # elif data_type == 'tuple_uint': + # return deserialize_tuple_uint(serialized) + # elif data_type == 'tuple_signed_int': + # return deserialize_tuple_signed_int(serialized) + # elif data_type == 'tuple_fixed_point': + # return deserialize_tuple_fixed_point(serialized, fp_impl) + # elif data_type == 'tuple_tensor_uint': + # return deserialize_tuple_tensor_uint(serialized) + # elif data_type == 'tuple_tensor_signed_int': + # return deserialize_tuple_tensor_signed_int(serialized) + # elif data_type == 'tuple_tensor_fixed_point': + # return deserialize_tuple_tensor_fixed_point(serialized, fp_impl) else: raise ValueError(f"Unknown data type: {data_type}") @@ -76,44 +77,56 @@ def deserialize_fixed_point(serialized: list, impl='FP16x16') -> np.float64: def deserialize_arr_uint(serialized: list) -> np.array: - return np.array(serialized[1:], dtype=np.int64) + return np.array(serialized[0], dtype=np.int64) # ================= ARRAY SIGNED INT ================= -def deserialize_arr_signed_int(serialized: list) -> np.array: - num_ele = (len(serialized) - 1) // 2 +def deserialize_arr_signed_int(serialized): - deserialized_array = np.empty(num_ele, dtype=np.int64) + serialized = serialized[0] - for i in range(num_ele): - deserialized_array[i] = deserialize_signed_int( - serialized[1 + i*2: 3 + i*2]) + if len(serialized) % 2 != 0: + raise ValueError("Array length must be even") - return deserialized_array + deserialized = [] + for i in range(0, len(serialized), 2): + mag = serialized[i] + sign = serialized[i + 1] + + if sign == 1: + mag = -mag + + deserialized.append(mag) + + return np.array(deserialized) # ================= ARRAY FIXED POINT ================= -def deserialize_arr_fixed_point(serialized: list, impl='FP16x16') -> np.array: - num_ele = (len(serialized) - 1) // 2 +def deserialize_arr_fixed_point(serialized: list, impl='FP16x16'): - deserialized_array = np.empty(num_ele, dtype=np.float64) + serialized = serialized[0] - for i in range(num_ele): - deserialized_array[i] = deserialize_fixed_point( - serialized[1 + i*2: 3 + i*2], impl) + if len(serialized) % 2 != 0: + raise ValueError("Array length must be even") - return deserialized_array + deserialized = [] + for i in range(0, len(serialized), 2): + mag = serialized[i] + sign = serialized[i + 1] + + deserialized.append(deserialize_fixed_point([mag, sign], impl)) + + return np.array(deserialized) # ================= TENSOR UINT ================= def deserialize_tensor_uint(serialized: list) -> np.array: - num_shape_elements = serialized[0] - shape = serialized[1:1 + num_shape_elements] - data = serialized[1 + num_shape_elements + 1:] + shape = serialized[0] + data = serialized[1] return np.array(data, dtype=np.int64).reshape(shape) @@ -121,108 +134,104 @@ def deserialize_tensor_uint(serialized: list) -> np.array: def deserialize_tensor_signed_int(serialized: list) -> np.array: - num_shape_elements = serialized[0] - shape = serialized[1:1 + num_shape_elements] - data = deserialize_arr_signed_int( - serialized[1 + num_shape_elements:]) + shape = serialized[0] + data = deserialize_arr_signed_int([serialized[1]]) - return data.reshape(shape) + return np.array(data, dtype=np.int64).reshape(shape) # ================= TENSOR FIXED POINT ================= - def deserialize_tensor_fixed_point(serialized: list, impl='FP16x16') -> np.array: - num_shape_elements = serialized[0] - shape = serialized[1:1 + num_shape_elements] - data = deserialize_arr_fixed_point( - serialized[1 + num_shape_elements:], impl) + shape = serialized[0] + data = deserialize_arr_fixed_point([serialized[1]], impl) + + return np.array(data, dtype=np.float64).reshape(shape) - return data.reshape(shape) # ================= TUPLE UINT ================= -def deserialize_tuple_uint(serialized: list): - return np.array(serialized, dtype=np.int64) +# def deserialize_tuple_uint(serialized: list): +# return np.array(serialized[0], dtype=np.int64) -# ================= TUPLE SIGNED INT ================= +# # ================= TUPLE SIGNED INT ================= -def deserialize_tuple_signed_int(serialized: list): - num_ele = (len(serialized)) // 2 +# def deserialize_tuple_signed_int(serialized: list): +# num_ele = (len(serialized)) // 2 - deserialized_array = np.empty(num_ele, dtype=np.int64) +# deserialized_array = np.empty(num_ele, dtype=np.int64) - for i in range(num_ele): - deserialized_array[i] = deserialize_signed_int( - serialized[i*2: 3 + i*2]) +# for i in range(num_ele): +# deserialized_array[i] = deserialize_signed_int( +# serialized[i*2: 3 + i*2]) - return deserialized_array +# return deserialized_array -# ================= TUPLE FIXED POINT ================= +# # ================= TUPLE FIXED POINT ================= -def deserialize_tuple_fixed_point(serialized: list, impl='FP16x16'): - num_ele = (len(serialized)) // 2 +# def deserialize_tuple_fixed_point(serialized: list, impl='FP16x16'): +# num_ele = (len(serialized)) // 2 - deserialized_array = np.empty(num_ele, dtype=np.float64) +# deserialized_array = np.empty(num_ele, dtype=np.float64) - for i in range(num_ele): - deserialized_array[i] = deserialize_fixed_point( - serialized[i*2: 3 + i*2], impl) +# for i in range(num_ele): +# deserialized_array[i] = deserialize_fixed_point( +# serialized[i*2: 3 + i*2], impl) - return deserialized_array +# return deserialized_array -# ================= TUPLE TENSOR UINT ================= +# # ================= TUPLE TENSOR UINT ================= -def deserialize_tuple_tensor_uint(serialized: list): - return deserialize_tuple_tensor(serialized, deserialize_arr_uint) +# def deserialize_tuple_tensor_uint(serialized: list): +# return deserialize_tuple_tensor(serialized, deserialize_arr_uint) -# ================= TUPLE TENSOR SIGNED INT ================= +# # ================= TUPLE TENSOR SIGNED INT ================= -def deserialize_tuple_tensor_signed_int(serialized: list): - return deserialize_tuple_tensor(serialized, deserialize_arr_signed_int) +# def deserialize_tuple_tensor_signed_int(serialized: list): +# return deserialize_tuple_tensor(serialized, deserialize_arr_signed_int) -# ================= TUPLE TENSOR FIXED POINT ================= +# # ================= TUPLE TENSOR FIXED POINT ================= -def deserialize_tuple_tensor_fixed_point(serialized: list, impl='FP16x16'): - return deserialize_tuple_tensor(serialized, deserialize_arr_fixed_point, impl) +# def deserialize_tuple_tensor_fixed_point(serialized: list, impl='FP16x16'): +# return deserialize_tuple_tensor(serialized, deserialize_arr_fixed_point, impl) -# ================= HELPERS ================= +# # ================= HELPERS ================= -def extract_shape(serialized, start_index): - """ Extracts the shape part of a tensor from a serialized list. """ - num_shape_elements = serialized[start_index] - shape = serialized[start_index + 1: start_index + 1 + num_shape_elements] - return shape, start_index + 1 + num_shape_elements +# def extract_shape(serialized, start_index): +# """ Extracts the shape part of a tensor from a serialized list. """ +# num_shape_elements = serialized[start_index] +# shape = serialized[start_index + 1: start_index + 1 + num_shape_elements] +# return shape, start_index + 1 + num_shape_elements -def extract_data(serialized, start_index, deserialization_func, impl=None): - """ Extracts and deserializes the data part of a tensor from a serialized list. """ - num_data_elements = serialized[start_index] - end_index = start_index + 1 + num_data_elements - data_serialized = serialized[start_index: end_index] - if impl: - data = deserialization_func(data_serialized, impl) - else: - data = deserialization_func(data_serialized) - return data, end_index - - -def deserialize_tuple_tensor(serialized, deserialization_func, impl=None): - """ Generic deserialization function for a tuple of tensors. """ - deserialized_tensors = [] - i = 0 - while i < len(serialized): - shape, i = extract_shape(serialized, i) - data, i = extract_data(serialized, i, deserialization_func, impl) - tensor = data.reshape(shape) - deserialized_tensors.append(tensor) - return tuple(deserialized_tensors) +# def extract_data(serialized, start_index, deserialization_func, impl=None): +# """ Extracts and deserializes the data part of a tensor from a serialized list. """ +# num_data_elements = serialized[start_index] +# end_index = start_index + 1 + num_data_elements +# data_serialized = serialized[start_index: end_index] +# if impl: +# data = deserialization_func(data_serialized, impl) +# else: +# data = deserialization_func(data_serialized) +# return data, end_index + + +# def deserialize_tuple_tensor(serialized, deserialization_func, impl=None): +# """ Generic deserialization function for a tuple of tensors. """ +# deserialized_tensors = [] +# i = 0 +# while i < len(serialized): +# shape, i = extract_shape(serialized, i) +# data, i = extract_data(serialized, i, deserialization_func, impl) +# tensor = data.reshape(shape) +# deserialized_tensors.append(tensor) +# return tuple(deserialized_tensors) diff --git a/tests/test_deserialize.py b/tests/test_deserialize.py index 2609f1f..b6509dc 100644 --- a/tests/test_deserialize.py +++ b/tests/test_deserialize.py @@ -26,92 +26,91 @@ def test_deserialize_signed_int(): def test_deserialize_array_uint(): - serialized = [2, 1, 2] + serialized = [[1, 2]] deserialized = deserialize_arr_uint(serialized) assert np.array_equal(deserialized, np.array([1, 2], dtype=np.int64)) def test_deserialize_array_signed_int(): - serialized = [2, 42, 0, 42, 1] + serialized = [[42, 0, 42, 1]] deserialized = deserialize_arr_signed_int(serialized) assert np.array_equal(deserialized, np.array([42, -42], dtype=np.int64)) def test_deserialize_arr_fixed_point(): - serialized = [2, 2780037, 0, 2780037, 1] + serialized = [[2780037, 0, 2780037, 1]] deserialized = deserialize_arr_fixed_point(serialized) expected = np.array([42.42, -42.42], dtype=np.float64) assert np.all(np.isclose(deserialized, expected, atol=1e-7)) -def test_deserialize_tuple_uint(): - serialized = [1, 2] - deserialized = deserialize_tuple_uint(serialized) - assert np.array_equal(deserialized, np.array([1, 2], dtype=np.int64)) - - -def test_deserialize_tuple_signed_int(): - serialized = [42, 0, 42, 1, 42, 0] - deserialized = deserialize_tuple_signed_int(serialized) - assert np.array_equal(deserialized, np.array( - [42, -42, 42], dtype=np.int64)) - - -def test_deserialize_tuple_fixed_point(): - serialized = [2780037, 0, 2780037, 1, 2780037, 0] - deserialized = deserialize_tuple_fixed_point(serialized) - expected = np.array([42.42, -42.42, 42.42], dtype=np.float64) - assert np.all(np.isclose(deserialized, expected, atol=1e-7)) - - def test_deserialize_tensor_uint(): - serialized = [2, 2, 2, 4, 1, 2, 3, 4] + serialized = [[2, 2], [1, 2, 3, 4]] deserialized = deserialize_tensor_uint(serialized) assert np.array_equal(deserialized, np.array( ([1, 2], [3, 4]), dtype=np.int64)) def test_deserialize_tensor_signed_int(): - serialized_tensor = [2, 2, 2, 8, 42, 0, 42, 0, 42, 1, 42, 1] + serialized_tensor = [[2, 2], [42, 0, 42, 0, 42, 1, 42, 1]] deserialized = deserialize_tensor_signed_int(serialized_tensor) assert np.array_equal(deserialized, np.array([[42, 42], [-42, -42]])) def test_deserialize_tensor_fixed_point(): - serialized_tensor = [2, 2, 2, 8, 2780037, - 0, 2780037, 0, 2780037, 1, 2780037, 1] + serialized_tensor = [[2, 2], [2780037, + 0, 2780037, 0, 2780037, 1, 2780037, 1]] expected_array = np.array([[42.42, 42.42], [-42.42, -42.42]]) deserialized = deserialize_tensor_fixed_point(serialized_tensor) assert np.allclose(deserialized, expected_array, atol=1e-7) -def test_deserialize_tensor_tuple_tensor_uint(): - serialized = [2, 2, 2, 4, 1, 2, 3, 4, 2, 2, 2, 4, 5, 6, 7, 8] - deserialized = deserialize_tuple_tensor_uint(serialized) +# def test_deserialize_tuple_uint(): +# serialized = [1, 2] +# deserialized = deserialize_tuple_uint(serialized) +# assert np.array_equal(deserialized, np.array([1, 2], dtype=np.int64)) - assert np.array_equal(deserialized[0], np.array( - [[1, 2], [3, 4]], dtype=np.int64)) - assert np.array_equal(deserialized[1], np.array( - [[5, 6], [7, 8]], dtype=np.int64)) +# def test_deserialize_tuple_signed_int(): +# serialized = [42, 0, 42, 1, 42, 0] +# deserialized = deserialize_tuple_signed_int(serialized) +# assert np.array_equal(deserialized, np.array( +# [42, -42, 42], dtype=np.int64)) -def test_deserialize_tensor_tuple_tensor_signed_int(): - serialized = [2, 2, 2, 8, 42, - 0, 42, 0, 42, 1, 42, 1, 2, 2, 2, 8, 42, - 0, 42, 0, 42, 1, 42, 1] - deserialized = deserialize_tuple_tensor_signed_int(serialized) - expected_array = np.array([[42, 42], [-42, -42]]) - assert np.allclose(deserialized[0], expected_array, atol=1e-7) - assert np.allclose(deserialized[1], expected_array, atol=1e-7) +# def test_deserialize_tuple_fixed_point(): +# serialized = [2780037, 0, 2780037, 1, 2780037, 0] +# deserialized = deserialize_tuple_fixed_point(serialized) +# expected = np.array([42.42, -42.42, 42.42], dtype=np.float64) +# assert np.all(np.isclose(deserialized, expected, atol=1e-7)) +# def test_deserialize_tensor_tuple_tensor_uint(): +# serialized = [2, 2, 2, 4, 1, 2, 3, 4, 2, 2, 2, 4, 5, 6, 7, 8] +# deserialized = deserialize_tuple_tensor_uint(serialized) -def test_deserialize_tensor_tuple_tensor_fixed_point(): - serialized = [2, 2, 2, 8, 2780037, - 0, 2780037, 0, 2780037, 1, 2780037, 1, 2, 2, 2, 8, 2780037, - 0, 2780037, 0, 2780037, 1, 2780037, 1] - deserialized = deserialize_tuple_tensor_fixed_point(serialized) +# assert np.array_equal(deserialized[0], np.array( +# [[1, 2], [3, 4]], dtype=np.int64)) +# assert np.array_equal(deserialized[1], np.array( +# [[5, 6], [7, 8]], dtype=np.int64)) - expected_array = np.array([[42.42, 42.42], [-42.42, -42.42]]) - assert np.allclose(deserialized[0], expected_array, atol=1e-7) - assert np.allclose(deserialized[1], expected_array, atol=1e-7) + +# def test_deserialize_tensor_tuple_tensor_signed_int(): +# serialized = [2, 2, 2, 8, 42, +# 0, 42, 0, 42, 1, 42, 1, 2, 2, 2, 8, 42, +# 0, 42, 0, 42, 1, 42, 1] +# deserialized = deserialize_tuple_tensor_signed_int(serialized) + +# expected_array = np.array([[42, 42], [-42, -42]]) +# assert np.allclose(deserialized[0], expected_array, atol=1e-7) +# assert np.allclose(deserialized[1], expected_array, atol=1e-7) + + +# def test_deserialize_tensor_tuple_tensor_fixed_point(): +# serialized = [2, 2, 2, 8, 2780037, +# 0, 2780037, 0, 2780037, 1, 2780037, 1, 2, 2, 2, 8, 2780037, +# 0, 2780037, 0, 2780037, 1, 2780037, 1] +# deserialized = deserialize_tuple_tensor_fixed_point(serialized) + +# expected_array = np.array([[42.42, 42.42], [-42.42, -42.42]]) +# assert np.allclose(deserialized[0], expected_array, atol=1e-7) +# assert np.allclose(deserialized[1], expected_array, atol=1e-7) From d4a241eaae04587fdf80ea743f52ad32b10587e7 Mon Sep 17 00:00:00 2001 From: raphaelDkhn Date: Fri, 19 Jan 2024 17:31:11 +0200 Subject: [PATCH 3/3] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1f76c2f..2174ace 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "giza-osiris" -version = "0.1.6" +version = "0.1.7" description = "Osiris is a Python library designed for efficient data conversion and management, primarily transforming data into Cairo programs" authors = ["Fran Algaba "] readme = "README.md"