From 662d9336e37a1960d2ead84b3644f1491d83ed3a Mon Sep 17 00:00:00 2001 From: mou Date: Mon, 3 Apr 2023 02:37:22 +0800 Subject: [PATCH 1/7] Added C++ allocator to manage rdmalib::Buffer - An error occurred while linking the `RdmaAllocator` library to the `warm_benchmark` program. ``` FAILED: benchmarks/warm_benchmarker : && /bin/clang++-15 -Wall -Wextra -g -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG CMakeFiles/warm_benchmarker.dir/benchmarks/warm_benchmark.cpp.o CMakeFiles/warm_benchmarker.dir/benchmarks/warm_benchmark_opts.cpp.o -o benchmarks/warm_benchmarker _deps/spdlog-build/libspdlogd.a librfaaslib.a libbenchmarks.a librfaaslib.a librdmalib.a _deps/spdlog-build/libspdlogd.a /usr/lib/x86_64-linux-gnu/librdmacm.so /usr/lib/x86_64-linux-gnu/libibverbs.so -ldl && : /bin/ld: /bin/ld: DWARF error: invalid or unhandled FORM value: 0x23 CMakeFiles/warm_benchmarker.dir/benchmarks/warm_benchmark.cpp.o: in function `main': warm_benchmark.cpp:(.text+0x493): undefined reference to `rfaas::RdmaAllocator >::allocate(unsigned long const&, int const&, unsigned long)' /bin/ld: warm_benchmark.cpp:(.text+0x4e0): undefined reference to `rfaas::RdmaAllocator >::allocate(unsigned long const&, int const&, unsigned long)' clang: error: linker command failed with exit code 1 (use -v to see invocation) ninja: build stopped: subcommand failed. ``` - Checking the argument and parameter types helped resolve the linking error. - Inline functions are recommended to be merged into header files to allow for their optimization by the compiler. --- CMakeLists.txt | 3 ++- benchmarks/warm_benchmark.cpp | 27 +++++++++++++++++--------- rfaas/include/rfaas/rdma_allocator.hpp | 27 ++++++++++++++++++++++++++ rfaas/lib/rdma_allocator.cpp | 25 ++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 rfaas/include/rfaas/rdma_allocator.hpp create mode 100644 rfaas/lib/rdma_allocator.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 138a0ca..1b7748e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -g -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") -string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra ") +string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -std=c++17") ### # Mandatory: devices configuration file. @@ -127,6 +127,7 @@ target_link_libraries(rdmalib PRIVATE cereal) # client library ### file(GLOB rdmalib_files "rfaas/lib/*.cpp") +message(STATUS "rdmalib_files ${rdmalib_files}") add_library(rfaaslib STATIC ${rdmalib_files}) add_dependencies(rfaaslib spdlog) add_dependencies(rfaaslib cereal) diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index d16f36b..68343e3 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -13,6 +13,7 @@ #include #include +#include #include "warm_benchmark.hpp" #include "settings.hpp" @@ -68,19 +69,27 @@ int main(int argc, char ** argv) } // FIXME: move me to a memory allocator - rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); - in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); - out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - memset(in.data(), 0, opts.input_size); - for(int i = 0; i < opts.input_size; ++i) { - ((char*)in.data())[i] = 1; + + rfaas::RdmaAllocator > rdmaAllocator(executor); + auto in = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE, + rdmalib::functions::Submission::DATA_HEADER_SIZE); + auto out = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + // rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); + // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); + // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + + // TODO: Since the for loop writes a value of 1 to each byte of the in buffer, + // it overwrites all bytes previously set to 0 by the memset() function. + memset(in->data(), 0, opts.input_size); + for (int i = 0; i < opts.input_size; ++i) { + ((char *) in->data())[i] = 1; } rdmalib::Benchmarker<1> benchmarker{settings.benchmark.repetitions}; spdlog::info("Warmups begin"); for(int i = 0; i < settings.benchmark.warmup_repetitions; ++i) { SPDLOG_DEBUG("Submit warm {}", i); - executor.execute(opts.fname, in, out); + executor.execute(opts.fname, *in, *out); } spdlog::info("Warmups completed"); @@ -88,7 +97,7 @@ int main(int argc, char ** argv) for(int i = 0; i < settings.benchmark.repetitions;) { benchmarker.start(); SPDLOG_DEBUG("Submit execution {}", i); - auto ret = executor.execute(opts.fname, in, out); + auto ret = executor.execute(opts.fname, *in, *out); if(std::get<0>(ret)) { SPDLOG_DEBUG("Finished execution {} out of {}", i, settings.benchmark.repetitions); benchmarker.end(0); @@ -108,7 +117,7 @@ int main(int argc, char ** argv) printf("Data: "); for(int i = 0; i < std::min(100, opts.input_size); ++i) - printf("%d ", ((char*)out.data())[i]); + printf("%d ", ((char*)out->data())[i]); printf("\n"); return 0; diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp new file mode 100644 index 0000000..c46029a --- /dev/null +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -0,0 +1,27 @@ +// +// Created by mou on 4/2/23. +// + +#ifndef __RFAAS_RDMA_ALLOCATOR_HPP__ +#define __RFAAS_RDMA_ALLOCATOR_HPP__ + +#include +#include +#include + +namespace rfaas { + template + class RdmaAllocator { + private: + const executor &_executor; + + public: + inline explicit RdmaAllocator(const executor &executor) noexcept: _executor(executor) {} + + inline T *allocate(const std::size_t &, const int &, int = 0); + + inline void deallocate(T *p, std::size_t n) noexcept; + }; +} + +#endif //__RFAAS_RDMA_ALLOCATOR_HPP__ diff --git a/rfaas/lib/rdma_allocator.cpp b/rfaas/lib/rdma_allocator.cpp new file mode 100644 index 0000000..a9d9d6c --- /dev/null +++ b/rfaas/lib/rdma_allocator.cpp @@ -0,0 +1,25 @@ +// +// Created by mou on 4/2/23. +// +#include + +#include + +namespace rfaas { + + template + inline T *RdmaAllocator::allocate(const std::size_t &size, const int &access, int header) { + if (size > std::size_t(-1) / sizeof(T)) + throw std::bad_alloc(); + + rdmalib::Buffer buffer(size, header); + buffer.register_memory(_executor._state.pd(), access); + + return buffer; + } + + template + inline void RdmaAllocator::deallocate(T *p, std::size_t n) noexcept { + operator delete(p); + } +} \ No newline at end of file From 4105069f7d0e143efc7d953e348324a9176619a3 Mon Sep 17 00:00:00 2001 From: mou Date: Mon, 3 Apr 2023 04:03:35 +0800 Subject: [PATCH 2/7] resolve #18 Add C++ allocator (partial) - Compiled successfully without any errors. - Add allocator implementation in rfaaslib. - Encapsulate the memory registration in rdmalib - Add test demonstrating standard memory allocation. --- rfaas/include/rfaas/rdma_allocator.hpp | 15 +++++++++++++-- rfaas/lib/rdma_allocator.cpp | 25 ------------------------- 2 files changed, 13 insertions(+), 27 deletions(-) delete mode 100644 rfaas/lib/rdma_allocator.cpp diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index c46029a..3b5946d 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -18,9 +18,20 @@ namespace rfaas { public: inline explicit RdmaAllocator(const executor &executor) noexcept: _executor(executor) {} - inline T *allocate(const std::size_t &, const int &, int = 0); + // inline T *allocate(const std::size_t &, const int &, int = 0); + inline T *allocate(const std::size_t &size, const int &access, int header=0) { + if (size > std::size_t(-1) / sizeof(T)) + throw std::bad_alloc(); - inline void deallocate(T *p, std::size_t n) noexcept; + auto buffer = new rdmalib::Buffer(size, header); + buffer->register_memory(_executor._state.pd(), access); + std::cout << "allocate memory by RdmaAllocator" << std::endl; + return buffer; + } + + inline void deallocate(T *p, std::size_t n) noexcept { + operator delete(p); + } }; } diff --git a/rfaas/lib/rdma_allocator.cpp b/rfaas/lib/rdma_allocator.cpp deleted file mode 100644 index a9d9d6c..0000000 --- a/rfaas/lib/rdma_allocator.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Created by mou on 4/2/23. -// -#include - -#include - -namespace rfaas { - - template - inline T *RdmaAllocator::allocate(const std::size_t &size, const int &access, int header) { - if (size > std::size_t(-1) / sizeof(T)) - throw std::bad_alloc(); - - rdmalib::Buffer buffer(size, header); - buffer.register_memory(_executor._state.pd(), access); - - return buffer; - } - - template - inline void RdmaAllocator::deallocate(T *p, std::size_t n) noexcept { - operator delete(p); - } -} \ No newline at end of file From 889333e31b08745ef4bbb02842c85668b118693e Mon Sep 17 00:00:00 2001 From: mou Date: Mon, 3 Apr 2023 23:35:41 +0800 Subject: [PATCH 3/7] Modify `RdmaAllocator` to have a structure similar to the ["C++ named requirements example for the Allocator"](https://en.cppreference.com/w/cpp/named_req/Allocator). Signed-off-by: mou --- CMakeLists.txt | 2 +- benchmarks/warm_benchmark.cpp | 4 +-- rfaas/include/rfaas/rdma_allocator.hpp | 46 ++++++++++++++++++-------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b7748e..a312719 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -g -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") -string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -std=c++17") +string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra") ### # Mandatory: devices configuration file. diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index 68343e3..ad45b6d 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -71,9 +71,9 @@ int main(int argc, char ** argv) // FIXME: move me to a memory allocator rfaas::RdmaAllocator > rdmaAllocator(executor); - auto in = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE, + rdmalib::Buffer* in = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE, rdmalib::functions::Submission::DATA_HEADER_SIZE); - auto out = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + rdmalib::Buffer* out = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); // rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index 3b5946d..de1d30b 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -12,27 +12,47 @@ namespace rfaas { template class RdmaAllocator { - private: - const executor &_executor; - public: + typedef T value_type; + inline explicit RdmaAllocator(const executor &executor) noexcept: _executor(executor) {} - // inline T *allocate(const std::size_t &, const int &, int = 0); - inline T *allocate(const std::size_t &size, const int &access, int header=0) { - if (size > std::size_t(-1) / sizeof(T)) - throw std::bad_alloc(); + template + constexpr RdmaAllocator(const RdmaAllocator &) noexcept {} + + [[nodiscard]] inline T *allocate(const std::size_t &size, const int &access, int header = 0) { + if (size > std::numeric_limits::max() / sizeof(T)) + throw std::bad_array_new_length(); + + // Maybe we could directly call the memset function here + if (auto buffer = new rdmalib::Buffer(size, header)) { + report(buffer, size); + buffer->register_memory(_executor._state.pd(), access); + return buffer; + } + throw std::bad_alloc(); + } - auto buffer = new rdmalib::Buffer(size, header); - buffer->register_memory(_executor._state.pd(), access); - std::cout << "allocate memory by RdmaAllocator" << std::endl; - return buffer; + inline void deallocate(T *p, std::size_t size) noexcept { + report(p, size, 0); + std::free(p); } - inline void deallocate(T *p, std::size_t n) noexcept { - operator delete(p); + private: + const executor &_executor; + + void report(T *p, std::size_t n, bool alloc = true) const { + std::cout << (alloc ? "Alloc: " : "Dealloc: ") << sizeof(T) * n + << " bytes at " << std::hex << std::showbase + << reinterpret_cast(p) << std::dec << '\n'; } }; + + template + bool operator==(const RdmaAllocator &, const RdmaAllocator &) { return true; } + + template + bool operator!=(const RdmaAllocator &, const RdmaAllocator &) { return false; } } #endif //__RFAAS_RDMA_ALLOCATOR_HPP__ From eb1fe09eb038283a49109063c455d036b1fc2dc3 Mon Sep 17 00:00:00 2001 From: mou Date: Wed, 5 Apr 2023 15:33:40 +0800 Subject: [PATCH 4/7] set memory with mmap --- benchmarks/warm_benchmark.cpp | 10 +++++++--- rfaas/include/rfaas/rdma_allocator.hpp | 22 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index ad45b6d..b323a18 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -71,13 +71,17 @@ int main(int argc, char ** argv) // FIXME: move me to a memory allocator rfaas::RdmaAllocator > rdmaAllocator(executor); - rdmalib::Buffer* in = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE, - rdmalib::functions::Submission::DATA_HEADER_SIZE); - rdmalib::Buffer* out = rdmaAllocator.allocate(opts.input_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + rdmalib::Buffer* in = rdmaAllocator.allocate(opts.input_size); + rdmaAllocator.construct(in, IBV_ACCESS_LOCAL_WRITE); + rdmalib::Buffer* out = rdmaAllocator.allocate(opts.input_size); + rdmaAllocator.construct(out, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); // rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); +// const rfaas::RdmaAllocator > &rdmaAllocator1 = rdmaAllocator; + std::vector, rfaas::RdmaAllocator>> v(8, rdmaAllocator); + // TODO: Since the for loop writes a value of 1 to each byte of the in buffer, // it overwrites all bytes previously set to 0 by the memset() function. memset(in->data(), 0, opts.input_size); diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index de1d30b..78f8032 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -5,6 +5,7 @@ #ifndef __RFAAS_RDMA_ALLOCATOR_HPP__ #define __RFAAS_RDMA_ALLOCATOR_HPP__ +#include #include #include #include @@ -20,19 +21,34 @@ namespace rfaas { template constexpr RdmaAllocator(const RdmaAllocator &) noexcept {} - [[nodiscard]] inline T *allocate(const std::size_t &size, const int &access, int header = 0) { + [[nodiscard]] inline T *allocate(const size_t &size) { if (size > std::numeric_limits::max() / sizeof(T)) throw std::bad_array_new_length(); // Maybe we could directly call the memset function here - if (auto buffer = new rdmalib::Buffer(size, header)) { + mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (auto buffer = static_cast(std::malloc(size * sizeof(T)))){ report(buffer, size); - buffer->register_memory(_executor._state.pd(), access); return buffer; } throw std::bad_alloc(); } + template + void construct (U* p, arg1 access) + { + std::cout << "constructor" << std::endl; + p->register_memory(_executor._state.pd(), access); + } + + template + void construct (U* p, arg1 access, arg2 head) + { + std::cout << "constructor" << std::endl; + p->register_memory(_executor._state.pd(), access, head); + } +// [[nodiscard]] inline T *construct(const std::size_t &size, const int &access, int header = 0) { + inline void deallocate(T *p, std::size_t size) noexcept { report(p, size, 0); std::free(p); From 980aa50935f8a8504e48ee1418283ba2016282b1 Mon Sep 17 00:00:00 2001 From: mou Date: Thu, 6 Apr 2023 20:36:35 +0800 Subject: [PATCH 5/7] Fixed Allocator to CPP standard version --- CMakeLists.txt | 2 +- benchmarks/warm_benchmark.cpp | 22 ++++++++++----- rfaas/include/rfaas/rdma_allocator.hpp | 37 ++++++++++++-------------- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a312719..46901a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -g -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_DEBUG ") -string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra") +string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra ") ### # Mandatory: devices configuration file. diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index b323a18..d7a4a94 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -70,17 +70,25 @@ int main(int argc, char ** argv) // FIXME: move me to a memory allocator - rfaas::RdmaAllocator > rdmaAllocator(executor); - rdmalib::Buffer* in = rdmaAllocator.allocate(opts.input_size); - rdmaAllocator.construct(in, IBV_ACCESS_LOCAL_WRITE); - rdmalib::Buffer* out = rdmaAllocator.allocate(opts.input_size); - rdmaAllocator.construct(out, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + // Sample: test demonstrating standard memory allocation. // rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); -// const rfaas::RdmaAllocator > &rdmaAllocator1 = rdmaAllocator; - std::vector, rfaas::RdmaAllocator>> v(8, rdmaAllocator); + // Sample: test demonstrating allocation with our custom allocator. + rfaas::RdmaInfo info_in(executor,IBV_ACCESS_LOCAL_WRITE,rdmalib::functions::Submission::DATA_HEADER_SIZE); + rfaas::RdmaAllocator> allocator_in{info_in}; + rdmalib::Buffer* in = allocator_in.allocate(opts.input_size); + + rfaas::RdmaInfo info_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); + rfaas::RdmaAllocator> allocator_out{info_out}; + rdmalib::Buffer* out = allocator_out.allocate(opts.input_size); + + // Sample: test demonstrating allocation with std::vector. + // rfaas::RdmaInfo info_v(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); + // rfaas::RdmaAllocator> allocator_v{info_v}; + // std::vector, rfaas::RdmaAllocator>> v(allocator_v); + // TODO: Since the for loop writes a value of 1 to each byte of the in buffer, // it overwrites all bytes previously set to 0 by the memset() function. diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index 78f8032..1691b03 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -11,12 +11,24 @@ #include namespace rfaas { + + struct RdmaInfo { + + public: + RdmaInfo(executor& executor, const int access, const int header_size=0) + : executor(executor), access(access), header_size(header_size) {} + const executor& executor; + const int access; + const int header_size = 0; + }; + template class RdmaAllocator { + public: typedef T value_type; - inline explicit RdmaAllocator(const executor &executor) noexcept: _executor(executor) {} + inline explicit RdmaAllocator(RdmaInfo& info) noexcept: _info(info) {} template constexpr RdmaAllocator(const RdmaAllocator &) noexcept {} @@ -26,36 +38,21 @@ namespace rfaas { throw std::bad_array_new_length(); // Maybe we could directly call the memset function here - mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - if (auto buffer = static_cast(std::malloc(size * sizeof(T)))){ + if (auto buffer = new rdmalib::Buffer(size, _info.header_size)) { report(buffer, size); + buffer->register_memory(_info.executor._state.pd(), _info.access); return buffer; } throw std::bad_alloc(); } - template - void construct (U* p, arg1 access) - { - std::cout << "constructor" << std::endl; - p->register_memory(_executor._state.pd(), access); - } - - template - void construct (U* p, arg1 access, arg2 head) - { - std::cout << "constructor" << std::endl; - p->register_memory(_executor._state.pd(), access, head); - } -// [[nodiscard]] inline T *construct(const std::size_t &size, const int &access, int header = 0) { - inline void deallocate(T *p, std::size_t size) noexcept { report(p, size, 0); - std::free(p); + p->~T(); } private: - const executor &_executor; + const RdmaInfo &_info; void report(T *p, std::size_t n, bool alloc = true) const { std::cout << (alloc ? "Alloc: " : "Dealloc: ") << sizeof(T) * n From 1bfd93604013a45a3b451e8c10b042415cdb690d Mon Sep 17 00:00:00 2001 From: mou Date: Fri, 7 Apr 2023 03:55:37 +0800 Subject: [PATCH 6/7] Added construct in rdmaAllocator.hpp --- benchmarks/warm_benchmark.cpp | 29 +++++++++++++++++++++----- rfaas/include/rfaas/rdma_allocator.hpp | 28 ++++++++++++++++++------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index d7a4a94..68ed242 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -75,19 +75,34 @@ int main(int argc, char ** argv) // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + // Sample: test demonstrating allocation with our custom allocator. rfaas::RdmaInfo info_in(executor,IBV_ACCESS_LOCAL_WRITE,rdmalib::functions::Submission::DATA_HEADER_SIZE); rfaas::RdmaAllocator> allocator_in{info_in}; - rdmalib::Buffer* in = allocator_in.allocate(opts.input_size); + rdmalib::Buffer* in0 = allocator_in.allocate(opts.input_size); + allocator_in.construct(in0, opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); rfaas::RdmaInfo info_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); rfaas::RdmaAllocator> allocator_out{info_out}; - rdmalib::Buffer* out = allocator_out.allocate(opts.input_size); + rdmalib::Buffer* out0 = allocator_out.allocate(opts.input_size); + allocator_out.construct(out0, opts.input_size); // Sample: test demonstrating allocation with std::vector. - // rfaas::RdmaInfo info_v(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); - // rfaas::RdmaAllocator> allocator_v{info_v}; - // std::vector, rfaas::RdmaAllocator>> v(allocator_v); + rfaas::RdmaInfo info_v_in(executor,IBV_ACCESS_LOCAL_WRITE, rdmalib::functions::Submission::DATA_HEADER_SIZE); + rfaas::RdmaAllocator> allocator_v_in{info_v_in}; + std::vector, rfaas::RdmaAllocator>> v_in(allocator_v_in); + + rfaas::RdmaInfo info_v_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); + rfaas::RdmaAllocator> allocator_v_out{info_v_out}; + std::vector, rfaas::RdmaAllocator>> v_out(allocator_v_out); + // allocator_out.construct(out, opts.input_size); + + v_in.push_back({static_cast(opts.input_size),rdmalib::functions::Submission::DATA_HEADER_SIZE}); + v_out.push_back({static_cast(opts.input_size)}); + + rdmalib::Buffer* in = &v_in[0]; + rdmalib::Buffer* out = &v_out[0]; + // TODO: Since the for loop writes a value of 1 to each byte of the in buffer, @@ -132,5 +147,9 @@ int main(int argc, char ** argv) printf("%d ", ((char*)out->data())[i]); printf("\n"); +// std::free(&v_in); +// std::free(&v_out); +// v_in.get_allocator().deallocate(&v_in[0],opts.input_size); +// v_out.get_allocator().deallocate(&v_out[0],opts.input_size); return 0; } diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index 1691b03..7d8a6aa 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -31,21 +31,33 @@ namespace rfaas { inline explicit RdmaAllocator(RdmaInfo& info) noexcept: _info(info) {} template - constexpr RdmaAllocator(const RdmaAllocator &) noexcept {} + constexpr explicit RdmaAllocator(const RdmaAllocator &) noexcept {} [[nodiscard]] inline T *allocate(const size_t &size) { if (size > std::numeric_limits::max() / sizeof(T)) throw std::bad_array_new_length(); - // Maybe we could directly call the memset function here - if (auto buffer = new rdmalib::Buffer(size, _info.header_size)) { - report(buffer, size); - buffer->register_memory(_info.executor._state.pd(), _info.access); - return buffer; + if (auto p = static_cast(std::malloc(size * sizeof(T) + _info.header_size))) + { + report(p, size * sizeof(T) + _info.header_size); + return p; } +// // Maybe we could directly call the memset function here +// if (auto buffer = new rdmalib::Buffer(size, _info.header_size)) { +// report(buffer, size); +// buffer->register_memory(_info.executor._state.pd(), _info.access); +// return buffer; +// } throw std::bad_alloc(); } + template + void construct(U* p, Args&&... args) { + ::new(p) U(std::forward(args)...); +// ::new(static_cast(p)) U(std::forward(args)...); + p->register_memory(_info.executor._state.pd(), _info.access); + } + inline void deallocate(T *p, std::size_t size) noexcept { report(p, size, 0); p->~T(); @@ -54,8 +66,8 @@ namespace rfaas { private: const RdmaInfo &_info; - void report(T *p, std::size_t n, bool alloc = true) const { - std::cout << (alloc ? "Alloc: " : "Dealloc: ") << sizeof(T) * n + void report(T *p, std::size_t size, bool alloc = true) const { + std::cout << (alloc ? "Alloc: " : "Dealloc: ") << size << " bytes at " << std::hex << std::showbase << reinterpret_cast(p) << std::dec << '\n'; } From 50a235194646d2e70bf3e0c8fe6ed3fef33ec6cc Mon Sep 17 00:00:00 2001 From: mou Date: Fri, 7 Apr 2023 05:34:32 +0800 Subject: [PATCH 7/7] Enable allocation with std::vector - Add a `construct` method in `rdmaAllocator.hpp` to enable allocation with std::vector. - Test demonstrating standard memory allocation practices. - Test demonstrating allocation with our custom allocator. - Test demonstrating allocation with std::vector. - Improve coding style and adhering to clang-tidy standards. Signed-off-by: mou --- CMakeLists.txt | 1 - benchmarks/warm_benchmark.cpp | 34 ++++++++++---------- rfaas/include/rfaas/rdma_allocator.hpp | 44 ++++++++++++-------------- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46901a7..138a0ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,6 @@ target_link_libraries(rdmalib PRIVATE cereal) # client library ### file(GLOB rdmalib_files "rfaas/lib/*.cpp") -message(STATUS "rdmalib_files ${rdmalib_files}") add_library(rfaaslib STATIC ${rdmalib_files}) add_dependencies(rfaaslib spdlog) add_dependencies(rfaaslib cereal) diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index 68ed242..427cef2 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -71,38 +71,40 @@ int main(int argc, char ** argv) // FIXME: move me to a memory allocator // Sample: test demonstrating standard memory allocation. + // rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); // in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); // out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); // Sample: test demonstrating allocation with our custom allocator. - rfaas::RdmaInfo info_in(executor,IBV_ACCESS_LOCAL_WRITE,rdmalib::functions::Submission::DATA_HEADER_SIZE); - rfaas::RdmaAllocator> allocator_in{info_in}; - rdmalib::Buffer* in0 = allocator_in.allocate(opts.input_size); - allocator_in.construct(in0, opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); - rfaas::RdmaInfo info_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); - rfaas::RdmaAllocator> allocator_out{info_out}; - rdmalib::Buffer* out0 = allocator_out.allocate(opts.input_size); - allocator_out.construct(out0, opts.input_size); + // rfaas::RdmaInfo info_in(executor,IBV_ACCESS_LOCAL_WRITE,rdmalib::functions::Submission::DATA_HEADER_SIZE); + // rfaas::RdmaAllocator> allocator_in{info_in}; + // rdmalib::Buffer* in0 = allocator_in.allocate(opts.input_size); + // allocator_in.construct(in0, opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); + // + // rfaas::RdmaInfo info_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); + // rfaas::RdmaAllocator> allocator_out{info_out}; + // rdmalib::Buffer* out0 = allocator_out.allocate(opts.input_size); + // allocator_out.construct(out0, opts.input_size); + // Sample: test demonstrating allocation with std::vector. - rfaas::RdmaInfo info_v_in(executor,IBV_ACCESS_LOCAL_WRITE, rdmalib::functions::Submission::DATA_HEADER_SIZE); + + rfaas::RdmaInfo info_v_in(executor, IBV_ACCESS_LOCAL_WRITE, rdmalib::functions::Submission::DATA_HEADER_SIZE); rfaas::RdmaAllocator> allocator_v_in{info_v_in}; std::vector, rfaas::RdmaAllocator>> v_in(allocator_v_in); - rfaas::RdmaInfo info_v_out(executor,(IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE)); + rfaas::RdmaInfo info_v_out(executor, (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); rfaas::RdmaAllocator> allocator_v_out{info_v_out}; std::vector, rfaas::RdmaAllocator>> v_out(allocator_v_out); - // allocator_out.construct(out, opts.input_size); - - v_in.push_back({static_cast(opts.input_size),rdmalib::functions::Submission::DATA_HEADER_SIZE}); - v_out.push_back({static_cast(opts.input_size)}); - rdmalib::Buffer* in = &v_in[0]; - rdmalib::Buffer* out = &v_out[0]; + v_in.emplace_back(static_cast(opts.input_size), rdmalib::functions::Submission::DATA_HEADER_SIZE); + v_out.emplace_back(static_cast(opts.input_size)); + rdmalib::Buffer *in = &v_in[0]; + rdmalib::Buffer *out = &v_out[0]; // TODO: Since the for loop writes a value of 1 to each byte of the in buffer, diff --git a/rfaas/include/rfaas/rdma_allocator.hpp b/rfaas/include/rfaas/rdma_allocator.hpp index 7d8a6aa..731db11 100644 --- a/rfaas/include/rfaas/rdma_allocator.hpp +++ b/rfaas/include/rfaas/rdma_allocator.hpp @@ -15,11 +15,12 @@ namespace rfaas { struct RdmaInfo { public: - RdmaInfo(executor& executor, const int access, const int header_size=0) + RdmaInfo(executor &executor, const int &access, const int &header_size = 0) : executor(executor), access(access), header_size(header_size) {} - const executor& executor; - const int access; - const int header_size = 0; + + const executor &executor; + const int &access; + const int &header_size = 0; }; template @@ -28,45 +29,42 @@ namespace rfaas { public: typedef T value_type; - inline explicit RdmaAllocator(RdmaInfo& info) noexcept: _info(info) {} + inline constexpr explicit RdmaAllocator(RdmaInfo &info) noexcept: _info(info) {} template - constexpr explicit RdmaAllocator(const RdmaAllocator &) noexcept {} + inline constexpr explicit RdmaAllocator(const RdmaAllocator &) noexcept {} [[nodiscard]] inline T *allocate(const size_t &size) { if (size > std::numeric_limits::max() / sizeof(T)) throw std::bad_array_new_length(); - if (auto p = static_cast(std::malloc(size * sizeof(T) + _info.header_size))) - { - report(p, size * sizeof(T) + _info.header_size); + if (auto p = static_cast(std::malloc(size * sizeof(T) + _info.header_size))) { + report(p, size * sizeof(T) + _info.header_size); return p; } -// // Maybe we could directly call the memset function here -// if (auto buffer = new rdmalib::Buffer(size, _info.header_size)) { -// report(buffer, size); -// buffer->register_memory(_info.executor._state.pd(), _info.access); -// return buffer; -// } throw std::bad_alloc(); } - template - void construct(U* p, Args&&... args) { + template + inline void construct(U *p, Args &&... args) { ::new(p) U(std::forward(args)...); -// ::new(static_cast(p)) U(std::forward(args)...); p->register_memory(_info.executor._state.pd(), _info.access); } inline void deallocate(T *p, std::size_t size) noexcept { report(p, size, 0); - p->~T(); + std::free(p); } + template + struct rebind { + using other = RdmaAllocator; + }; + private: const RdmaInfo &_info; - void report(T *p, std::size_t size, bool alloc = true) const { + inline void report(T *p, std::size_t size, bool alloc = true) const { std::cout << (alloc ? "Alloc: " : "Dealloc: ") << size << " bytes at " << std::hex << std::showbase << reinterpret_cast(p) << std::dec << '\n'; @@ -74,10 +72,10 @@ namespace rfaas { }; template - bool operator==(const RdmaAllocator &, const RdmaAllocator &) { return true; } + inline bool operator==(const RdmaAllocator &, const RdmaAllocator &) { return true; } template - bool operator!=(const RdmaAllocator &, const RdmaAllocator &) { return false; } + inline bool operator!=(const RdmaAllocator &, const RdmaAllocator &) { return false; } } -#endif //__RFAAS_RDMA_ALLOCATOR_HPP__ +#endif //__RFAAS_RDMA_ALLOCATOR_HPP__ \ No newline at end of file