diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 5d078a50..80061856 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -126,19 +126,19 @@ Benchmark::~Benchmark() if (d_chunks != nullptr) { for (int i = 0; i < k; i++) { - quadiron::aligned_deallocate(d_chunks->at(i)); + this->allocator.deallocate(d_chunks->at(i), chunk_size); } delete d_chunks; } if (c_chunks != nullptr) { for (int i = 0; i < n_c; i++) { - quadiron::aligned_deallocate(c_chunks->at(i)); + this->allocator.deallocate(c_chunks->at(i), chunk_size); } delete c_chunks; } if (r_chunks != nullptr) { for (int i = 0; i < k; i++) { - quadiron::aligned_deallocate(r_chunks->at(i)); + this->allocator.deallocate(r_chunks->at(i), chunk_size); } delete r_chunks; } @@ -199,13 +199,13 @@ int Benchmark::init() r_chunks = new std::vector(k); for (i = 0; i < k; i++) { - d_chunks->at(i) = quadiron::aligned_allocate(chunk_size); + d_chunks->at(i) = this->allocator.allocate(chunk_size); } for (i = 0; i < n_c; i++) { - c_chunks->at(i) = quadiron::aligned_allocate(chunk_size); + c_chunks->at(i) = this->allocator.allocate(chunk_size); } for (i = 0; i < k; i++) { - r_chunks->at(i) = quadiron::aligned_allocate(chunk_size); + r_chunks->at(i) = this->allocator.allocate(chunk_size); } // Allocate memory for iostreambufs diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index afa1a408..d2359e60 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -38,6 +38,7 @@ #include #include "quadiron.h" +#include "simd/simd.h" #include "core.h" #include "iostreambuf.h" @@ -298,6 +299,7 @@ class Benchmark { PRNG* prng = nullptr; quadiron::fec::FecCode* fec = nullptr; Params_t* params = nullptr; + quadiron::simd::AlignedAllocator allocator; bool systematic_ec = false; diff --git a/src/core.h b/src/core.h index 67d2d5dc..5eaf84fe 100644 --- a/src/core.h +++ b/src/core.h @@ -105,45 +105,6 @@ static inline std::mt19937& prng() return PRNG; } -template -inline T* aligned_allocate(size_t size) -{ -#ifdef QUADIRON_USE_SIMD - size_t len = simd::ALIGNMENT + size * sizeof(T); - uint8_t* ptr = new uint8_t[len]; - if (!ptr) - return nullptr; - unsigned offset = (unsigned)((uintptr_t)ptr % simd::ALIGNMENT); - ptr += simd::ALIGNMENT - offset - 1; - // store offset - ptr[0] = (uint8_t)offset; // NOLINT - // increment ptr to the aligned location - ptr++; - T* data = reinterpret_cast(ptr); - return data; -#else - T* data = new T[size]; - if (!data) - return nullptr; - return data; -#endif -} - -template -inline void aligned_deallocate(T* data) -{ - if (!data) - return; -#ifdef QUADIRON_USE_SIMD - uint8_t* ptr = (uint8_t*)data; - unsigned offset = *(ptr - 1); - ptr -= simd::ALIGNMENT - offset; - delete[] ptr; -#else - delete[] data; -#endif -} - } // namespace quadiron #endif diff --git a/src/vec_buffers.h b/src/vec_buffers.h index 534a3d3b..d31c69d3 100644 --- a/src/vec_buffers.h +++ b/src/vec_buffers.h @@ -38,6 +38,7 @@ #include #include "core.h" +#include "simd/simd.h" namespace quadiron { namespace vec { @@ -131,6 +132,7 @@ class Buffers final { int n; private: + simd::AlignedAllocator allocator; BufMemAlloc mem_alloc_case = BufMemAlloc::FULL; T* zeros = nullptr; }; @@ -151,7 +153,7 @@ Buffers::Buffers(int n, size_t size) this->mem_alloc_case = BufMemAlloc::FULL; mem.reserve(n); for (int i = 0; i < n; i++) { - mem.push_back(aligned_allocate(size)); + mem.push_back(this->allocator.allocate(size)); } } @@ -193,7 +195,7 @@ Buffers::Buffers(const Buffers& vec, int n) mem.reserve(n); for (i = 0; i < this->n; i++) { - mem.push_back(aligned_allocate(this->size)); + mem.push_back(this->allocator.allocate(size)); } int copy_len = (this->n <= vec_n) ? this->n : vec_n; @@ -238,7 +240,7 @@ Buffers::Buffers(const Buffers& vec, int begin, int end) } else { // slice and padding zeros this->mem_alloc_case = BufMemAlloc::ZERO_EXTEND; - this->zeros = aligned_allocate(this->size); + this->zeros = this->allocator.allocate(size); std::memset(this->zeros, 0, this->size * sizeof(T)); mem.insert(mem.end(), vec_mem.begin() + begin, vec_mem.end()); @@ -312,7 +314,7 @@ Buffers::Buffers( } else { // output is zero-extended & shuffled from `vec` this->mem_alloc_case = BufMemAlloc::ZERO_EXTEND; - this->zeros = aligned_allocate(this->size); + this->zeros = this->allocator.allocate(size); std::memset(this->zeros, 0, this->size * sizeof(T)); for (unsigned i = 0; i < n; ++i) { @@ -330,10 +332,10 @@ Buffers::~Buffers() if (this->mem_alloc_case != BufMemAlloc::NONE && mem.size() > 0) { if (this->mem_alloc_case == BufMemAlloc::FULL) { for (int i = 0; i < n; i++) { - aligned_deallocate(mem[i]); + this->allocator.deallocate(mem[i], size); } } else if (this->mem_alloc_case == BufMemAlloc::ZERO_EXTEND) { - aligned_deallocate(this->zeros); + this->allocator.deallocate(this->zeros, size); } } } @@ -375,7 +377,7 @@ inline void Buffers::set(int i, T* buf) assert(i >= 0 && i < n); if ((mem_alloc_case == BufMemAlloc::NONE) && (mem[i] != nullptr)) - aligned_deallocate(mem[i]); + this->allocator.deallocate(mem[i], size); mem[i] = buf; } diff --git a/src/vec_vector.h b/src/vec_vector.h index e7cb1e83..4127d777 100644 --- a/src/vec_vector.h +++ b/src/vec_vector.h @@ -40,6 +40,7 @@ #include "core.h" #include "gf_ring.h" +#include "simd/simd.h" #include "vec_cast.h" #include "vec_doubled.h" @@ -108,6 +109,7 @@ class Vector { T* mem; int mem_len; bool new_mem; + simd::AlignedAllocator allocator; }; template @@ -116,7 +118,7 @@ Vector::Vector(const gf::RingModN& rn, int n, T* mem, int mem_len) this->rn = &rn; this->n = n; if (mem == nullptr) { - this->mem = aligned_allocate(n); + this->mem = this->allocator.allocate(n); this->mem_len = n; this->new_mem = true; } else { @@ -139,8 +141,9 @@ Vector::Vector(const gf::RingModN& rn, std::initializer_list values) template Vector::~Vector() { - if (new_mem) - aligned_deallocate(this->mem); + if (new_mem) { + this->allocator.deallocate(this->mem, n); + } } template @@ -203,8 +206,9 @@ inline T* Vector::get_mem() const template inline void Vector::set_mem(T* mem, int mem_len) { - if (new_mem) - aligned_deallocate(this->mem); + if (new_mem) { + this->allocator.deallocate(this->mem, n); + } new_mem = false; this->mem = mem; this->mem_len = mem_len; diff --git a/test/buffers_utest.cpp b/test/buffers_utest.cpp index 301b2a7a..a93d57bf 100644 --- a/test/buffers_utest.cpp +++ b/test/buffers_utest.cpp @@ -32,6 +32,7 @@ #include #include "quadiron.h" +#include "simd/simd.h" namespace vec = quadiron::vec; namespace gf = quadiron::gf; @@ -39,6 +40,8 @@ namespace gf = quadiron::gf; template class BuffersTest : public ::testing::Test { public: + quadiron::simd::AlignedAllocator allocator; + std::unique_ptr> gen_buffers_rand_data(int n, int size, int _max = 0) { @@ -49,7 +52,7 @@ class BuffersTest : public ::testing::Test { auto vec = std::make_unique>(n, size); for (int i = 0; i < n; i++) { - T* buf = quadiron::aligned_allocate(size); + T* buf = this->allocator.allocate(size); for (int j = 0; j < size; j++) { buf[j] = dis(prng); }