Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster packing #1172

Open
wants to merge 10 commits into
base: dev
Choose a base branch
from
Prev Previous commit
Next Next commit
Pre-Commit Hooks
bwintermann committed Aug 27, 2024
commit ddcf4bb4173ea46d7449ec5c42f61b07d9a17a59
36 changes: 20 additions & 16 deletions src/finn/util/data_packing.py
Original file line number Diff line number Diff line change
@@ -27,20 +27,19 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import binascii
from math import ceil

# Import the faster packing functions. This is executed when loading the module
# so that the faster version is always available when this is imported
import ctypes as ct
import numpy as np
import os
import sys
from bitstring import BitArray
from math import ceil
from numpy.ctypeslib import ndpointer
from qonnx.core.datatype import DataType
from qonnx.util.basic import roundup_to_integer_multiple


# Import the faster packing functions. This is executed when loading the module so that the faster version is always available when this is imported
import ctypes as ct
from numpy.ctypeslib import ndpointer
import os

# Setup
fastpack_source = os.path.abspath(os.path.join(os.path.dirname(__file__), "fast_pack.c"))
fastpack_lib = os.path.abspath(os.path.join(os.path.dirname(__file__), "fastpack.so"))
@@ -53,12 +52,15 @@
# Load
fastpack = ct.CDLL(fastpack_lib)
fastpack_floatarray = ndpointer(ct.c_float, flags="C_CONTIGUOUS")
fastpack.array_to_hexstring_binary.argtypes = (fastpack_floatarray, ct.c_uint, ct.c_uint, ct.c_char_p)
fastpack.array_to_hexstring_binary.argtypes = (
fastpack_floatarray,
ct.c_uint,
ct.c_uint,
ct.c_char_p,
)
fastpack.array_to_hexstring_binary.restype = ct.c_bool




def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False, use_fastpack=True):
"""
Pack given one-dimensional NumPy array with FINN DataType dtype into a hex
@@ -69,9 +71,9 @@ def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False, use_
fixed width. The minimum value for pad_to_nbits is 4, since a single hex
digit is four bits. reverse can be used to reverse the array prior to
packing.
When use_fastpack is set to true, if available the function is outsourced
When use_fastpack is set to true, if available the function is outsourced
to a faster C implementation for some cases.

Examples:

array2hexstring([1, 1, 1, 0], DataType["BINARY"], 4) = "0xe"
@@ -98,16 +100,16 @@ def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False, use_
if reverse:
array = np.flip(array, -1)


# Check if the fast way can be taken
# TODO: Expand this to cover more cases
if use_fastpack and dtype == DataType["BINARY"] and prefix == "0x":
output_string = ct.create_string_buffer(ceil(pad_to_nbits / 4) + 4)
success = fastpack.array_to_hexstring_binary(np.asarray(array, order='C'), array.size, pad_to_nbits, output_string)
success = fastpack.array_to_hexstring_binary(
np.asarray(array, order="C"), array.size, pad_to_nbits, output_string
)
assert success, f"Could not convert array {array} with datatype {dtype} to hexstring!"
return output_string.value.decode("utf-8")


lineval = BitArray(length=0)
bw = dtype.bitwidth()
# special handling for fixed point: rescale, then pack as integers
@@ -187,7 +189,9 @@ def pack_innermost_dim_as_hex_string(
ndarray = np.asarray(ndarray, dtype=np.float32)

def fun(x):
return array2hexstring(x, dtype, pad_to_nbits, reverse=reverse_inner, prefix=prefix, use_fastpack=use_fastpack)
return array2hexstring(
x, dtype, pad_to_nbits, reverse=reverse_inner, prefix=prefix, use_fastpack=use_fastpack
)

return np.apply_along_axis(fun, ndarray.ndim - 1, ndarray)

6 changes: 3 additions & 3 deletions src/finn/util/fast_pack.c
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@
* Takes a numpy array of floats in BINARY datatype from finn and the number of elements in that array, as well as the number of padded bits required.
* It also takes an out-string buffer to write the results to. This buffer is created by python via ctypes.create_string_buffer() and must be large enough to
* hold the required number of padded bits.
*
*
* The function returns false on an error and true in case of success
*/
bool array_to_hexstring_binary(float* values, unsigned int elements, unsigned int padded_bits, char* out) {
@@ -57,6 +57,6 @@ bool array_to_hexstring_binary(float* values, unsigned int elements, unsigned in
} else {
bit_shift_left++;
}
}
}
return true;
}
}
57 changes: 40 additions & 17 deletions tests/util/test_data_packing.py
Original file line number Diff line number Diff line change
@@ -26,21 +26,22 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import ctypes
import pytest

import numpy as np
import time
import os
import shutil
import subprocess
from finn.util.data_packing import array2hexstring
import time
from qonnx.core.datatype import DataType
from qonnx.util.basic import gen_finn_dt_tensor

from finn.util.basic import make_build_dir
from finn.util.data_packing import npy_to_rtlsim_input, numpy_to_hls_code, pack_innermost_dim_as_hex_string
from finn.util.data_packing import *
from finn.util.data_packing import (
npy_to_rtlsim_input,
numpy_to_hls_code,
pack_innermost_dim_as_hex_string,
)


@pytest.mark.util
@@ -186,42 +187,64 @@ def test_npy_to_rtlsim_input(dtype):
assert np.all(output_fast == output_slow_split), "different behavior of packing modes detected"



@pytest.mark.util
@pytest.mark.parametrize("tensorshape", [
(1, 2, 16384, 64),
(1, 1024, 2048)
])
@pytest.mark.parametrize("tensorshape", [(1, 2, 16384, 64), (1, 1024, 2048)])
def test_pack_innermost_dim_to_hexstring_fast(tensorshape: tuple[int]):
# check that the sped up function call in pack_inermost_dim_to_hex_string() is valid
tensor_count = 5
assert tensorshape[-1] % 4 == 0, "Smallest tensorshape dimension must be divisible by 4"

# Create random binary tensor by simply rounding a random tensor
tensors = [np.round(np.random.random(tensorshape)).astype(np.float32) for i in range(tensor_count)]
tensors = [
np.round(np.random.random(tensorshape)).astype(np.float32) for i in range(tensor_count)
]
results_python = []
results_c = []

# Test C impl
start_c = time.time()
for count in range(tensor_count):
c_result = pack_innermost_dim_as_hex_string(tensors[count], DataType["BINARY"], tensorshape[-1] * 2, reverse_inner=False, prefix="0x", use_fastpack=True)
c_result = pack_innermost_dim_as_hex_string(
tensors[count],
DataType["BINARY"],
tensorshape[-1] * 2,
reverse_inner=False,
prefix="0x",
use_fastpack=True,
)
results_c.append(c_result)
end_c = time.time()

# Test python impl
start_python = time.time()
for count in range(tensor_count):
python_result = pack_innermost_dim_as_hex_string(tensors[count], DataType["BINARY"], tensorshape[-1] * 2, reverse_inner=False, prefix="0x", use_fastpack=False)
python_result = pack_innermost_dim_as_hex_string(
tensors[count],
DataType["BINARY"],
tensorshape[-1] * 2,
reverse_inner=False,
prefix="0x",
use_fastpack=False,
)
results_python.append(python_result)
end_python = time.time()

assert np.array_equal(np.array(results_python), np.array(results_c))

# Write timing results
with open(os.path.join(os.path.dirname(__file__), f"fastpack_benchmark" + "_".join(map(lambda x: str(x), list(tensorshape))) + ".txt"), 'w+') as f:
with open(
os.path.join(
os.path.dirname(__file__),
"fastpack_benchmark" + "_".join(map(lambda x: str(x), list(tensorshape))) + ".txt",
),
"w+",
) as f:
f.write("Pack_innermost_dim_to_hexstring benchmark test results\n")
f.write("Shape: " + str(tensorshape) + "\n")
f.write(f"Ran {tensor_count} times\n")
f.write(f"Python: {end_python - start_python}s overall | {(end_python - start_python) / tensor_count}s on avg. per sample\n")
f.write(f"C: {end_c - start_c}s overall | {(end_c - start_c) / tensor_count}s on avg. per sample\n")
python_time = end_python - start_python
c_time = end_c - start_c
f.write(
f"Python: {python_time}s overall | {python_time / tensor_count}s on avg. per sample\n"
)
f.write(f"C: {c_time}s overall | {c_time / tensor_count}s on avg. per sample\n")