From 70967764de236a28f1c8ab1a1c5d83aaff745c49 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Thu, 31 Aug 2023 16:02:40 +0900 Subject: [PATCH] [SWAP] Modify cache for inference mode This patch is for inference mode for swap device. It re-enable mmap feature, but writing time is controlled manually, due to the inference mode handling. Signed-off-by: Jiho Chu --- nntrainer/tensor/cache_elem.cpp | 16 +++++++++++++--- nntrainer/tensor/cache_elem.h | 10 ++++++++-- nntrainer/tensor/cache_pool.cpp | 3 +++ nntrainer/tensor/swap_device.cpp | 19 +++++++++++++++++-- nntrainer/tensor/swap_device.h | 6 +++--- 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/nntrainer/tensor/cache_elem.cpp b/nntrainer/tensor/cache_elem.cpp index 9a4f53c19..91b70c0d1 100644 --- a/nntrainer/tensor/cache_elem.cpp +++ b/nntrainer/tensor/cache_elem.cpp @@ -26,7 +26,9 @@ std::map policyToStr = { {WRITE_BACK, "WRITE_BACK"}, {NO_WRITE_BACK, "NO_WRITE_BACK"}, {READ_CONSIST, "READ_CONSIST"}, {NO_READ_CONSIST, "NO_READ_CONSIST"}, {ALWAYS_SYNCED, "ALWAYS_SYNCED"}, {TEMPORAL, "TEMPORAL"}, - {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}}; + {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}, + {SYNC_ONCE, "SYNC_ONCE"} +}; inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) { return ((policy & CachePolicy::NO_READ_CONSIST) || @@ -37,7 +39,9 @@ inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) { inline bool checkDeallocOnly(CachePolicy policy, CacheElem::Options opt) { return ((policy & CachePolicy::NO_READ_CONSIST) || ((opt & CacheElem::Options::LAST_ACCESS) && - (policy & CachePolicy::FIRST_LAST_SKIP))); + (policy & CachePolicy::FIRST_LAST_SKIP)) || + ((policy & FRIST_WRITE_CONSIST) && + !(opt & CacheElem::Options::FIRST_WRITE))); } } // namespace @@ -49,7 +53,8 @@ void CacheElem::swapIn(Options opt) { bool alloc_only = checkAllocOnly(policy, opt); void *buf = device->getBuffer(offset, length, alloc_only); - initial_opt = Options::NONE; + initial_opt = + static_cast(initial_opt & ~Options::FIRST_ACCESS); mem_data->setAddr((void *)buf); mem_data->setValid(true); active = true; @@ -63,8 +68,13 @@ void CacheElem::swapIn(Options opt) { void CacheElem::swapOut(Options opt) { std::lock_guard lock(device_mutex); + + opt = static_cast(opt | initial_opt); bool dealloc_only = checkDeallocOnly(policy, opt); void *buf = (void *)mem_data->getAddr(); + + initial_opt = + static_cast(initial_opt & ~Options::FIRST_WRITE); device->putBuffer(buf, dealloc_only); mem_data->setAddr(nullptr); mem_data->setValid(false); diff --git a/nntrainer/tensor/cache_elem.h b/nntrainer/tensor/cache_elem.h index de891d407..a4c6e88c0 100644 --- a/nntrainer/tensor/cache_elem.h +++ b/nntrainer/tensor/cache_elem.h @@ -33,9 +33,12 @@ enum CachePolicy { NO_WRITE_BACK), /**< Will not be synchronized with device */ FIRST_LAST_SKIP = 0b10000, /**< Will skip first read and last write */ + FRIST_WRITE_CONSIST = 0b100000, /**< First invalidate will write to device */ ITERATION_CONSIST = (FIRST_LAST_SKIP | ALWAYS_SYNCED), /**< Will skip first read and last write. other behaviors will be same as ALWAYS_SYNCED */ + SYNC_ONCE = (FRIST_WRITE_CONSIST | READ_CONSIST | NO_WRITE_BACK), + /**< Will sync at first from the device, and the value will always consist */ }; /** @@ -48,6 +51,9 @@ class CacheElem { NONE = 0b0000, /**< No option */ FIRST_ACCESS = 0x0001, /**< First Access */ LAST_ACCESS = 0x0010, /**< Last Access */ + FIRST_WRITE = 0x0100, /**< First Write */ + FIRST_ACCESS_WRITE = FIRST_ACCESS | FIRST_WRITE, + /**< First access & write */ }; /** @@ -58,7 +64,7 @@ class CacheElem { size_t off, size_t len, std::shared_ptr data, CachePolicy pol = CachePolicy::ALWAYS_SYNCED) : - initial_opt(Options::FIRST_ACCESS), + initial_opt(Options::FIRST_ACCESS_WRITE), device(dev), active(false), id(mem_id), @@ -112,7 +118,7 @@ class CacheElem { * @brief reset access count * */ - void reset() { initial_opt = Options::FIRST_ACCESS; } + void reset() { initial_opt = Options::FIRST_ACCESS_WRITE; } private: Options initial_opt; /**< accessed */ diff --git a/nntrainer/tensor/cache_pool.cpp b/nntrainer/tensor/cache_pool.cpp index 2d555ceda..8f2d4e1f2 100644 --- a/nntrainer/tensor/cache_pool.cpp +++ b/nntrainer/tensor/cache_pool.cpp @@ -46,6 +46,9 @@ convertTensorLifespanToCachePolicy(const TensorLifespan lifespan) { case TensorLifespan::FORWARD_FUNC_LIFESPAN: policy = CachePolicy::TEMPORAL; break; + case TensorLifespan::FORWARD_INFER_LIFESPAN: + policy = CachePolicy::SYNC_ONCE; + break; case TensorLifespan::CALC_DERIV_LIFESPAN: policy = CachePolicy::TEMPORAL; break; diff --git a/nntrainer/tensor/swap_device.cpp b/nntrainer/tensor/swap_device.cpp index 0c215df36..c33eeaad1 100644 --- a/nntrainer/tensor/swap_device.cpp +++ b/nntrainer/tensor/swap_device.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -70,7 +71,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) { << std::string(strerror_r(errno, error_buf, error_buflen)); void *buf = static_cast(ptr + diff); - mapped[buf] = std::make_pair(ptr, len); + mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size); return buf; #else @@ -88,7 +89,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) { << "SwapDevice: seek file: " << dev_path; len = read(fd, ptr, size); - NNTR_THROW_IF(len != (ssize_t)size, std::runtime_error) + NNTR_THROW_IF(len != (size_t)size, std::runtime_error) << "SwapDevice: read file: " << dev_path; } @@ -107,7 +108,21 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) { NNTR_THROW_IF(mapped.find(ptr) == mapped.end(), std::runtime_error) << "Couldn't find buffer"; + off_t off; + ssize_t len; + auto info = mapped[ptr]; + if (!dealloc_only) { + off = lseek(fd, std::get<2>(info), SEEK_SET); + NNTR_THROW_IF(off < 0, std::runtime_error) + << "SwapDevice: seek file: " << dev_path; + + ssize_t size = std::get<3>(info); + len = write(fd, ptr, size); + NNTR_THROW_IF(len != size, std::runtime_error) + << "SwapDevice: write file: " << len << "::"<< std::to_string(size) << dev_path; + } + ret = munmap(std::get(info), std::get(info)); const size_t error_buflen = 100; char error_buf[error_buflen]; diff --git a/nntrainer/tensor/swap_device.h b/nntrainer/tensor/swap_device.h index 11d5dcd27..720c8d04b 100644 --- a/nntrainer/tensor/swap_device.h +++ b/nntrainer/tensor/swap_device.h @@ -26,7 +26,7 @@ #include /* Uncomment this to use mmap for swap data */ -//#define USE_MMAP +#define USE_MMAP namespace nntrainer { @@ -119,8 +119,8 @@ class SwapDevice { int fd; /**< device file description */ #ifdef USE_MMAP - std::map> - mapped; /**< > */ + std::map> + mapped; /**< > */ #else std::map> allocated; /**< > */