From 13d65e399ac41dd6dd0b09c2a831b9ffcad1d5cb Mon Sep 17 00:00:00 2001 From: derpda Date: Thu, 16 Sep 2021 21:03:19 +0900 Subject: [PATCH 1/3] Tight packing on hardware for DataPack (#44) * Initial try at WidthCalculator for DataPack More template magic could make this nicer. * Fix: Was using wrong type * Small cleanup * Specializations improved,ap_int & ap_ufixed added * Catch update, test cases added for Xilinx types * Bit width was too small for test value * Prevent segfaults by explicit type usage Function resolution seems to not work as intended for some reason... * Fix: Replace reinterpret_cast for xilinx types * Comment finished * TypeHandler made struct, moved to namespace * Removed C-style type conversions * Typo fixed in DataPack.h Co-authored-by: definelicht * Code review: Pass ap_uint by refernce, not value Co-authored-by: definelicht Co-authored-by: definelicht --- include/hlslib/xilinx/DataPack.h | 98 +++++++++++++++++++++++++++++-- xilinx_test/test/TestDataPack.cpp | 50 ++++++++-------- 2 files changed, 121 insertions(+), 27 deletions(-) diff --git a/include/hlslib/xilinx/DataPack.h b/include/hlslib/xilinx/DataPack.h index 048f4fa..e3ddf11 100644 --- a/include/hlslib/xilinx/DataPack.h +++ b/include/hlslib/xilinx/DataPack.h @@ -5,6 +5,7 @@ #include // ap_int.h will break some compilers if this is not included #include +#include #include namespace hlslib { @@ -16,6 +17,94 @@ class DataPackProxy; // Forward declaration } // End anonymous namespace +namespace detail { + +/// Helper class to allow more efficient packing on the FPGA, where memory +/// access is not restricted to byte-sized chunks. +/// This class should be specialized for types with bit-widths that are not a +/// multiple of a byte. For examples, see below specializations for common +/// Xilinx arbitrary bit-width types. +template +struct TypeHandler { + static constexpr int width = 8 * sizeof(T); + + static T from_range(ap_uint const &range) { + return *reinterpret_cast(&range); + } + + static ap_uint to_range(T const &value) { + return *reinterpret_cast const *>(&value); + } +}; + +template +struct TypeHandler> { + static constexpr int width = _AP_W; + + static ap_int<_AP_W> from_range(ap_uint const &range) { + ap_int<_AP_W> out; + out.range() = range; + return out; + } + + static ap_uint to_range(ap_int<_AP_W> const &value) { return value.range(); } +}; + +template +struct TypeHandler> { + static constexpr int width = _AP_W; + + static ap_uint<_AP_W> from_range(ap_uint const &range) { + ap_uint<_AP_W> out; + out.range() = range; + return out; + } + + static ap_uint to_range(ap_uint<_AP_W> const &value) { return value.range(); } +}; + +template +struct TypeHandler> { + static constexpr int width = _AP_W; + + static ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> from_range( + ap_uint const &range + ) { + ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> out; + out.range() = range; + return out; + } + + static ap_uint to_range( + ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> const &value + ) { + return value.range().to_ap_int_base(); + } +}; + +template +struct TypeHandler> { + static constexpr int width = _AP_W; + + static ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> from_range( + ap_uint const &range + ) { + ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> out; + out.range() = range; + return out; + } + + static ap_uint to_range( + ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> const &value + ) { + return value.range().to_ap_int_base(); + } +}; + +} // End namespace detail + + + /// Class to accommodate SIMD-style vectorization of a data path on FPGA using /// ap_uint to force wide ports. /// @@ -28,7 +117,7 @@ class DataPack { public: - static constexpr int kBits = 8 * sizeof(T); + static constexpr int kBits = detail::TypeHandler::width; static constexpr int kWidth = width; using Pack_t = ap_uint; using Internal_t = ap_uint; @@ -79,7 +168,7 @@ class DataPack { } #endif Pack_t temp = data_.range((i + 1) * kBits - 1, i * kBits); - return *reinterpret_cast(&temp); + return detail::TypeHandler::from_range(temp); } void Set(int i, T value) { @@ -91,8 +180,9 @@ class DataPack { throw std::out_of_range(ss.str()); } #endif - Pack_t temp = *reinterpret_cast(&value); - data_.range((i + 1) * kBits - 1, i * kBits) = temp; + data_.range((i + 1) * kBits - 1, i * kBits) = ( + detail::TypeHandler::to_range(value) + ); } void Fill(T const &value) { diff --git a/xilinx_test/test/TestDataPack.cpp b/xilinx_test/test/TestDataPack.cpp index 4cdaa88..3e5d2b5 100644 --- a/xilinx_test/test/TestDataPack.cpp +++ b/xilinx_test/test/TestDataPack.cpp @@ -4,12 +4,16 @@ #include "hlslib/xilinx/DataPack.h" #include "catch.hpp" -using Test_t = int; +#include "ap_fixed.h" +#include "ap_int.h" + constexpr int kWidth = 4; -constexpr int kFillVal = 5; -using DataPack = hlslib::DataPack; -TEST_CASE("DataPack", "[DataPack]") { +TEMPLATE_TEST_CASE( + "DataPack", "[DataPack][template]", + int, ap_int<5>, ap_uint<33>, (ap_fixed<9, 4>), (ap_ufixed<19, 5>)) { + const TestType kFillVal = 5; + using DataPack = hlslib::DataPack; SECTION("Fill constructor") { const DataPack pack(kFillVal); @@ -35,7 +39,7 @@ TEST_CASE("DataPack", "[DataPack]") { } SECTION("Array constructor") { - Test_t arr[kWidth]; + TestType arr[kWidth]; std::fill(arr, arr + kWidth, kFillVal); const DataPack pack(arr); for (int i = 0; i < kWidth; ++i) { @@ -44,64 +48,64 @@ TEST_CASE("DataPack", "[DataPack]") { } SECTION("Assignment copy operator") { - DataPack lhs(0); + DataPack lhs(TestType(0)); for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == 0); + REQUIRE(lhs.Get(i) == 0); } DataPack rhs(kFillVal); lhs = rhs; for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == kFillVal); + REQUIRE(lhs.Get(i) == kFillVal); } } SECTION("Assignment move operator") { - DataPack lhs(0); + DataPack lhs(TestType(0)); for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == 0); + REQUIRE(lhs.Get(i) == 0); } DataPack rhs(kFillVal); lhs = std::move(rhs); for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == kFillVal); + REQUIRE(lhs.Get(i) == kFillVal); } } SECTION("Index-wise assignment") { - DataPack lhs(0); + DataPack lhs(TestType(0)); DataPack rhs(kFillVal); for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == 0); + REQUIRE(lhs.Get(i) == 0); } for (int i = 0; i < kWidth; ++i) { lhs[i] = rhs[i]; } for (int i = 0; i < kWidth; ++i) { - REQUIRE(lhs[i] == kFillVal); + REQUIRE(lhs.Get(i) == kFillVal); } } SECTION("Shift operation") { DataPack first(kFillVal); - DataPack second(0); - first.ShiftTo<0, kWidth/2, kWidth/2>(second); + DataPack second(TestType(0)); + first.template ShiftTo<0, kWidth/2, kWidth/2>(second); for (int i = 0; i < kWidth/2; ++i) { - REQUIRE(second[i] == 0); + REQUIRE(second.Get(i) == 0); } for (int i = kWidth/2; i < kWidth; ++i) { - REQUIRE(second[i] == kFillVal); + REQUIRE(second.Get(i) == kFillVal); } } SECTION("Pack and unpack") { - DataPack pack(0); - Test_t arr0[kWidth]; - Test_t arr1[kWidth]; + DataPack pack(TestType(0)); + TestType arr0[kWidth]; + TestType arr1[kWidth]; std::fill(arr0, arr0 + kWidth, kFillVal); std::fill(arr1, arr1 + kWidth, 0); pack.Pack(arr0); for (int i = 0; i < kWidth; ++i) { - REQUIRE(pack[i] == arr0[i]); + REQUIRE(pack.Get(i) == arr0[i]); } pack.Unpack(arr1); for (int i = 0; i < kWidth; ++i) { @@ -111,7 +115,7 @@ TEST_CASE("DataPack", "[DataPack]") { std::fill(arr1, arr1 + kWidth, 0); pack << arr0; for (int i = 0; i < kWidth; ++i) { - REQUIRE(pack[i] == arr0[i]); + REQUIRE(pack.Get(i) == arr0[i]); } pack >> arr1; for (int i = 0; i < kWidth; ++i) { From a20442d25b71fb283f319ab09ea7a970b96e9497 Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Thu, 16 Sep 2021 14:16:10 +0200 Subject: [PATCH 2/3] Some static assertions --- xilinx_test/test/TestDataPack.cpp | 42 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/xilinx_test/test/TestDataPack.cpp b/xilinx_test/test/TestDataPack.cpp index 3e5d2b5..d4c2129 100644 --- a/xilinx_test/test/TestDataPack.cpp +++ b/xilinx_test/test/TestDataPack.cpp @@ -1,24 +1,39 @@ /// @author Johannes de Fine Licht (definelicht@inf.ethz.ch) -/// @copyright This software is copyrighted under the BSD 3-Clause License. +/// @copyright This software is copyrighted under the BSD 3-Clause License. -#include "hlslib/xilinx/DataPack.h" -#include "catch.hpp" +#include #include "ap_fixed.h" #include "ap_int.h" +#include "catch.hpp" +#include "hlslib/xilinx/DataPack.h" constexpr int kWidth = 4; -TEMPLATE_TEST_CASE( - "DataPack", "[DataPack][template]", - int, ap_int<5>, ap_uint<33>, (ap_fixed<9, 4>), (ap_ufixed<19, 5>)) { +static_assert(hlslib::DataPack, 3>::kBits == 5, "Invalid bit size."); +static_assert(std::is_same, 3>::Internal_t, + ap_uint<15>>::value, + "Invalid internal type."); +static_assert(hlslib::DataPack, 8>::kBits == 9, + "Invalid bit size."); +static_assert(std::is_same, 8>::Internal_t, + ap_uint<72>>::value, + "Invalid internal type."); +static_assert(hlslib::DataPack, 8>::kBits == 23, + "Invalid bit size."); +static_assert(std::is_same, 8>::Internal_t, + ap_uint<184>>::value, + "Invalid internal type."); + +TEMPLATE_TEST_CASE("DataPack", "[DataPack][template]", int, ap_int<5>, + ap_uint<33>, (ap_fixed<9, 4>), (ap_ufixed<19, 5>)) { const TestType kFillVal = 5; using DataPack = hlslib::DataPack; SECTION("Fill constructor") { const DataPack pack(kFillVal); for (int i = 0; i < kWidth; ++i) { - REQUIRE(pack[i] == kFillVal); + REQUIRE(pack[i] == kFillVal); } } @@ -26,7 +41,7 @@ TEMPLATE_TEST_CASE( const DataPack pack(kFillVal); const DataPack copy(pack); for (int i = 0; i < kWidth; ++i) { - REQUIRE(copy[i] == kFillVal); + REQUIRE(copy[i] == kFillVal); } } @@ -34,7 +49,7 @@ TEMPLATE_TEST_CASE( const DataPack pack(kFillVal); const DataPack move(std::move(pack)); for (int i = 0; i < kWidth; ++i) { - REQUIRE(move[i] == kFillVal); + REQUIRE(move[i] == kFillVal); } } @@ -43,7 +58,7 @@ TEMPLATE_TEST_CASE( std::fill(arr, arr + kWidth, kFillVal); const DataPack pack(arr); for (int i = 0; i < kWidth; ++i) { - REQUIRE(pack[i] == kFillVal); + REQUIRE(pack[i] == kFillVal); } } @@ -88,11 +103,11 @@ TEMPLATE_TEST_CASE( SECTION("Shift operation") { DataPack first(kFillVal); DataPack second(TestType(0)); - first.template ShiftTo<0, kWidth/2, kWidth/2>(second); - for (int i = 0; i < kWidth/2; ++i) { + first.template ShiftTo<0, kWidth / 2, kWidth / 2>(second); + for (int i = 0; i < kWidth / 2; ++i) { REQUIRE(second.Get(i) == 0); } - for (int i = kWidth/2; i < kWidth; ++i) { + for (int i = kWidth / 2; i < kWidth; ++i) { REQUIRE(second.Get(i) == kFillVal); } } @@ -130,5 +145,4 @@ TEMPLATE_TEST_CASE( ss << pack; REQUIRE(ss.str() == "{a, b, c, d, e}"); } - } From 7e8241c18b818b761282a0f098d9b02e31047041 Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Thu, 16 Sep 2021 14:28:23 +0200 Subject: [PATCH 3/3] Run tight packing test in synthesis --- xilinx_test/CMakeLists.txt | 1 + xilinx_test/kernels/TightPacking.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 xilinx_test/kernels/TightPacking.cpp diff --git a/xilinx_test/CMakeLists.txt b/xilinx_test/CMakeLists.txt index e3ba5ad..c3ccd5f 100644 --- a/xilinx_test/CMakeLists.txt +++ b/xilinx_test/CMakeLists.txt @@ -85,3 +85,4 @@ add_hls_test("StreamAPI") add_hls_test("Subflow") add_hls_test("HBMandBlockCopy" CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/configs/hbmkernel.cfg) add_hls_test("DDRExplicit" PORT_MAPPING "ddr0:DDR[0] ddr1:DDR[1]") +add_hls_test("TightPacking") diff --git a/xilinx_test/kernels/TightPacking.cpp b/xilinx_test/kernels/TightPacking.cpp new file mode 100644 index 0000000..50ab34e --- /dev/null +++ b/xilinx_test/kernels/TightPacking.cpp @@ -0,0 +1,12 @@ +#include + +void TightPacking(hlslib::DataPack, 32> const *mem_in, + hlslib::DataPack, 32> *mem_out, int n) { + for (int i = 0; i < n; ++i) { + #pragma HLS PIPELINE + for (int w = 0; w < 32; ++w) { + #pragma HLS UNROLL + mem_out[i][w] = mem_in[i][w]; + } + } +}