From acdae13dc1eafde8a8f71b038e332910f80bcd9d Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Thu, 31 Aug 2023 14:53:03 +0900 Subject: [PATCH 1/3] [SWAP] Add swap mode property Signed-off-by: Jiho Chu --- nntrainer.ini.in | 3 ++ nntrainer/graph/network_graph.h | 9 +++--- nntrainer/models/model_common_properties.cpp | 2 ++ nntrainer/models/model_common_properties.h | 18 +++++++++++ nntrainer/models/neuralnet.cpp | 32 ++++++++++++-------- nntrainer/models/neuralnet.h | 11 +++---- nntrainer/tensor/manager.cpp | 4 ++- nntrainer/tensor/manager.h | 5 ++- 8 files changed, 59 insertions(+), 25 deletions(-) diff --git a/nntrainer.ini.in b/nntrainer.ini.in index 9bee8cd49..dc373dd65 100644 --- a/nntrainer.ini.in +++ b/nntrainer.ini.in @@ -16,3 +16,6 @@ memory_swap_path = @MEMORY_SWAP_PATH@ # look ahead window size memory_swap_lookahead = @MEMORY_SWAP_LOOKAHEAD@ + +# swap mode ("inference" or "train") +memory_swap_lookahead = "train" diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h index 5c9adf036..49f71760a 100644 --- a/nntrainer/graph/network_graph.h +++ b/nntrainer/graph/network_graph.h @@ -58,12 +58,13 @@ class NetworkGraph { * @param[in] enable_swap enable memory swap for tensor * @param[in] swap_path memory swap file path when the swap is enabled */ - NetworkGraph(bool enable_swap, const std::string &swap_path = "", - unsigned int lookahead = 0, + NetworkGraph(bool enable_swap, const std::string &swap_mode = "train", + const std::string &swap_path = "", unsigned int lookahead = 0, const std::string &tensor_format_ = "NCHW", const std::string &tensor_dtype_ = "FP32-FP32") : - tensor_manager(std::make_shared(enable_swap, swap_path, lookahead, - tensor_format_, tensor_dtype_)), + tensor_manager(std::make_shared(enable_swap, swap_mode, swap_path, + lookahead, tensor_format_, + tensor_dtype_)), graph(), compiled(false), batch_size(0), diff --git a/nntrainer/models/model_common_properties.cpp b/nntrainer/models/model_common_properties.cpp index 984cad662..aeb0b60f8 100644 --- a/nntrainer/models/model_common_properties.cpp +++ b/nntrainer/models/model_common_properties.cpp @@ -33,6 +33,8 @@ MemorySwap::MemorySwap(bool value) { set(value); } MemorySwapPath::MemorySwapPath(const std::string &value) { set(value); } +MemorySwapMode::MemorySwapMode(const std::string &value) { set(value); } + MemorySwapLookahead::MemorySwapLookahead(const unsigned int &value) { set(value); } diff --git a/nntrainer/models/model_common_properties.h b/nntrainer/models/model_common_properties.h index 3776afefc..333a78fea 100644 --- a/nntrainer/models/model_common_properties.h +++ b/nntrainer/models/model_common_properties.h @@ -179,6 +179,24 @@ class MemorySwapLookahead : public Property { MemorySwapLookahead(const unsigned int &value = 0); }; +/** + * @brief cache file path property + * + */ +class MemorySwapMode : public Property { +public: + static constexpr const char *key = + "memory_swap_mode"; /**< unique key to access */ + using prop_tag = str_prop_tag; /**< property type */ + + /** + * @brief Constructor + * + * @param value value to set, defaults to current directory + */ + MemorySwapMode(const std::string &value = "train"); +}; + /** * @brief Enumeration of Data Type for model & layer */ diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index d0e542825..692fe9909 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -67,11 +67,12 @@ namespace nntrainer { NeuralNetwork::NeuralNetwork() : model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(), props::LossScale()), - model_flex_props( - props::Epochs(), props::TrainingBatchSize(), props::SavePath(), - props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(), - props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(), - props::TensorFormat(), props::ModelTensorDataType()), + model_flex_props(props::Epochs(), props::TrainingBatchSize(), + props::SavePath(), props::ContinueTrain(), + props::SaveBestPath(), props::MemoryOptimization(), + props::MemorySwap(), props::MemorySwapPath(), + props::MemorySwapLookahead(), props::TensorFormat(), + props::ModelTensorDataType(), props::MemorySwapMode()), load_path(std::string()), epoch_idx(0), iter(0), @@ -86,11 +87,12 @@ NeuralNetwork::NeuralNetwork() : NeuralNetwork::NeuralNetwork(AppContext app_context_) : model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(), props::LossScale()), - model_flex_props( - props::Epochs(), props::TrainingBatchSize(), props::SavePath(), - props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(), - props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(), - props::TensorFormat(), props::ModelTensorDataType()), + model_flex_props(props::Epochs(), props::TrainingBatchSize(), + props::SavePath(), props::ContinueTrain(), + props::SaveBestPath(), props::MemoryOptimization(), + props::MemorySwap(), props::MemorySwapPath(), + props::MemorySwapLookahead(), props::TensorFormat(), + props::ModelTensorDataType(), props::MemorySwapMode()), load_path(std::string()), epoch_idx(0), iter(0), @@ -172,6 +174,8 @@ int NeuralNetwork::compile() { bool memory_swap = std::get(model_flex_props); const std::string memory_swap_path = std::get(model_flex_props); + const std::string memory_swap_mode = + std::get(model_flex_props); unsigned int lookahead = std::get(model_flex_props); @@ -181,8 +185,8 @@ int NeuralNetwork::compile() { const std::string tensor_type = to_string(std::get(model_flex_props)); - model_graph = NetworkGraph(memory_swap, memory_swap_path, lookahead, - tensor_format, tensor_type); + model_graph = NetworkGraph(memory_swap, memory_swap_mode, memory_swap_path, + lookahead, tensor_format, tensor_type); model_graph.setMemoryOptimizations( std::get(model_flex_props)); @@ -260,7 +264,9 @@ int NeuralNetwork::initialize(ExecutionMode mode) { } // Allocate weights - model_graph.allocateWeights(); + const std::string memory_swap_mode = + std::get(model_flex_props); + model_graph.allocateWeights(memory_swap_mode.compare("inference") != 0); initialized = true; diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 30d2288fd..0e10d266b 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -624,12 +624,11 @@ s * @retval shared_ptr const std::string file_path) override; private: - using FlexiblePropTypes = - std::tuple; + using FlexiblePropTypes = std::tuple< + props::Epochs, props::TrainingBatchSize, props::SavePath, + props::ContinueTrain, props::SaveBestPath, props::MemoryOptimization, + props::MemorySwap, props::MemorySwapPath, props::MemorySwapLookahead, + props::TensorFormat, props::ModelTensorDataType, props::MemorySwapMode>; using RigidPropTypes = std::tuple, std::vector, props::ClipGradByGlobalNorm, diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index 8b4723579..bc1a37338 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -376,7 +376,9 @@ std::vector Manager::requestWeights( * and therefore, if we remove the calcDerivative order, then tests fails. */ - TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN; + TensorLifespan var_ls = swap_mode == "inference" + ? TensorLifespan::FORWARD_INFER_LIFESPAN + : TensorLifespan::MAX_LIFESPAN; TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN; std::vector ret; diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h index 9ccde7711..4b7881425 100644 --- a/nntrainer/tensor/manager.h +++ b/nntrainer/tensor/manager.h @@ -141,13 +141,14 @@ class Manager { /** * @brief Constructor of Manager */ - Manager(bool enable_swap, const std::string &swap_path = "", + Manager(bool enable_swap, const std::string &swap_mode = "train", const std::string &swap_path = "", unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW", const std::string tensor_dtype_ = "FP32-FP32") : weight_pool(enable_swap, swap_path, "weight_pool"), tensor_pool(enable_swap, swap_path, "tensor_pool"), enable_optimizations(true), swap_lookahead(lookahead), + swap_mode(swap_mode), tensor_format(tensor_format_), tensor_dtype(split(tensor_dtype_, getRegex("\\-"))), exec_mode(ExecutionMode::TRAIN) {} @@ -523,6 +524,8 @@ class Manager { unsigned int swap_lookahead; /** lookahead for memory swap */ + std::string swap_mode; /** swap mode */ + std::string tensor_format; std::vector tensor_dtype; From 02c1474d5c560380b61015118d548a4448ac2e14 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Thu, 31 Aug 2023 16:06:09 +0900 Subject: [PATCH 2/3] [SWAP] Add inference mode Signed-off-by: Jiho Chu --- nntrainer/graph/network_graph.h | 4 ++-- nntrainer/tensor/manager.cpp | 4 ++-- nntrainer/tensor/manager.h | 10 ++++++---- nntrainer/tensor/tensor_pool.cpp | 4 ++-- nntrainer/tensor/tensor_pool.h | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h index 49f71760a..7a1073d07 100644 --- a/nntrainer/graph/network_graph.h +++ b/nntrainer/graph/network_graph.h @@ -356,9 +356,9 @@ class NetworkGraph { /** * @brief Allocate memory for all the managed weights */ - void allocateWeights() { + void allocateWeights(bool init = true) { tensor_manager->allocateWeights( - std::get<3>(backward_iter_end->getExecutionOrder())); + std::get<3>(backward_iter_end->getExecutionOrder()), init); } /** diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index bc1a37338..1d0e0b610 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -144,10 +144,10 @@ void Manager::reinitialize() { tensor_pool.reinitialize(); } -void Manager::allocateWeights(unsigned int max_exec_order_) { +void Manager::allocateWeights(unsigned int max_exec_order_, bool init) { if (!weight_pool.isAllocated()) { finalizeTensorPool(weight_pool, 0, max_exec_order_); - weight_pool.allocate(); + weight_pool.allocate(init); } } diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h index 4b7881425..53aa8f299 100644 --- a/nntrainer/tensor/manager.h +++ b/nntrainer/tensor/manager.h @@ -141,11 +141,13 @@ class Manager { /** * @brief Constructor of Manager */ - Manager(bool enable_swap, const std::string &swap_mode = "train", const std::string &swap_path = "", - unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW", + Manager(bool enable_swap, const std::string &swap_mode = "train", + const std::string &swap_path = "", unsigned int lookahead = 0, + const std::string tensor_format_ = "NCHW", const std::string tensor_dtype_ = "FP32-FP32") : weight_pool(enable_swap, swap_path, "weight_pool"), - tensor_pool(enable_swap, swap_path, "tensor_pool"), + tensor_pool(enable_swap && (swap_mode.compare("train") == 0), swap_path, + "tensor_pool"), enable_optimizations(true), swap_lookahead(lookahead), swap_mode(swap_mode), @@ -382,7 +384,7 @@ class Manager { * @note this will make requests to the tensor pool and allocate the * corresponding weights */ - void allocateWeights(unsigned int max_exec_order_); + void allocateWeights(unsigned int max_exec_order_, bool init = true); /** * @brief Deallocate memory for all the weights diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp index 0a69f1dce..27f22d8a0 100644 --- a/nntrainer/tensor/tensor_pool.cpp +++ b/nntrainer/tensor/tensor_pool.cpp @@ -217,7 +217,7 @@ void TensorPool::setBatchSize(const std::string &name, unsigned int batch) { /** * @brief Allocate memory for all the managed tensors */ -void TensorPool::allocate() { +void TensorPool::allocate(bool init) { if (minMemoryRequirement() == 0) return; mem_pool->allocate(); @@ -229,7 +229,7 @@ void TensorPool::allocate() { if (!details || details->token == 0) { continue; } - spec.tensor->setData(mem_pool->getMemory(details->token), 0, true); + spec.tensor->setData(mem_pool->getMemory(details->token), 0, init); syncDependents(spec); } diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h index fd17db7cd..1d2addb52 100644 --- a/nntrainer/tensor/tensor_pool.h +++ b/nntrainer/tensor/tensor_pool.h @@ -92,7 +92,7 @@ class TensorPool { /** * @brief Allocate memory for all the managed tensors */ - void allocate(); + void allocate(bool init = true); /** * @brief Deallocate memory for all the managed tensors From 42c1e8b872587d4e5048fe15ced08b987364d398 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Thu, 31 Aug 2023 16:02:40 +0900 Subject: [PATCH 3/3] [SWAP] Modify cache for inference mode This patch is for inference mode for swap device. It re-enable mmap feature, but writing time is controlled manually, due to the inference mode handling. Signed-off-by: Jiho Chu --- nntrainer/tensor/cache_elem.cpp | 23 +++++++++++++++++------ nntrainer/tensor/cache_elem.h | 10 ++++++++-- nntrainer/tensor/cache_pool.cpp | 3 +++ nntrainer/tensor/swap_device.cpp | 20 ++++++++++++++++++-- nntrainer/tensor/swap_device.h | 6 +++--- 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/nntrainer/tensor/cache_elem.cpp b/nntrainer/tensor/cache_elem.cpp index 9a4f53c19..c7849d70a 100644 --- a/nntrainer/tensor/cache_elem.cpp +++ b/nntrainer/tensor/cache_elem.cpp @@ -23,10 +23,15 @@ namespace nntrainer { namespace { std::map policyToStr = { - {WRITE_BACK, "WRITE_BACK"}, {NO_WRITE_BACK, "NO_WRITE_BACK"}, - {READ_CONSIST, "READ_CONSIST"}, {NO_READ_CONSIST, "NO_READ_CONSIST"}, - {ALWAYS_SYNCED, "ALWAYS_SYNCED"}, {TEMPORAL, "TEMPORAL"}, - {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}}; + {WRITE_BACK, "WRITE_BACK"}, + {NO_WRITE_BACK, "NO_WRITE_BACK"}, + {READ_CONSIST, "READ_CONSIST"}, + {NO_READ_CONSIST, "NO_READ_CONSIST"}, + {ALWAYS_SYNCED, "ALWAYS_SYNCED"}, + {TEMPORAL, "TEMPORAL"}, + {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, + {ITERATION_CONSIST, "ITER_CONSIST"}, + {SYNC_ONCE, "SYNC_ONCE"}}; inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) { return ((policy & CachePolicy::NO_READ_CONSIST) || @@ -37,7 +42,9 @@ inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) { inline bool checkDeallocOnly(CachePolicy policy, CacheElem::Options opt) { return ((policy & CachePolicy::NO_READ_CONSIST) || ((opt & CacheElem::Options::LAST_ACCESS) && - (policy & CachePolicy::FIRST_LAST_SKIP))); + (policy & CachePolicy::FIRST_LAST_SKIP)) || + ((policy & FRIST_WRITE_CONSIST) && + !(opt & CacheElem::Options::FIRST_WRITE))); } } // namespace @@ -49,7 +56,7 @@ void CacheElem::swapIn(Options opt) { bool alloc_only = checkAllocOnly(policy, opt); void *buf = device->getBuffer(offset, length, alloc_only); - initial_opt = Options::NONE; + initial_opt = static_cast(initial_opt & ~Options::FIRST_ACCESS); mem_data->setAddr((void *)buf); mem_data->setValid(true); active = true; @@ -63,8 +70,12 @@ void CacheElem::swapIn(Options opt) { void CacheElem::swapOut(Options opt) { std::lock_guard lock(device_mutex); + + opt = static_cast(opt | initial_opt); bool dealloc_only = checkDeallocOnly(policy, opt); void *buf = (void *)mem_data->getAddr(); + + initial_opt = static_cast(initial_opt & ~Options::FIRST_WRITE); device->putBuffer(buf, dealloc_only); mem_data->setAddr(nullptr); mem_data->setValid(false); diff --git a/nntrainer/tensor/cache_elem.h b/nntrainer/tensor/cache_elem.h index 9c024de49..0330df470 100644 --- a/nntrainer/tensor/cache_elem.h +++ b/nntrainer/tensor/cache_elem.h @@ -33,9 +33,12 @@ enum CachePolicy { NO_WRITE_BACK), /**< Will not be synchronized with device */ FIRST_LAST_SKIP = 0b10000, /**< Will skip first read and last write */ + FRIST_WRITE_CONSIST = 0b100000, /**< First invalidate will write to device */ ITERATION_CONSIST = (FIRST_LAST_SKIP | ALWAYS_SYNCED), /**< Will skip first read and last write. other behaviors will be same as ALWAYS_SYNCED */ + SYNC_ONCE = (FRIST_WRITE_CONSIST | READ_CONSIST | NO_WRITE_BACK), + /**< Will sync at first from the device, and the value will always consist */ }; /** @@ -48,6 +51,9 @@ class CacheElem { NONE = 0b0000, /**< No option */ FIRST_ACCESS = 0x0001, /**< First Access */ LAST_ACCESS = 0x0010, /**< Last Access */ + FIRST_WRITE = 0x0100, /**< First Write */ + FIRST_ACCESS_WRITE = FIRST_ACCESS | FIRST_WRITE, + /**< First access & write */ }; /** @@ -57,7 +63,7 @@ class CacheElem { explicit CacheElem(std::shared_ptr dev, unsigned int mem_id, size_t off, size_t len, std::shared_ptr data, CachePolicy pol = CachePolicy::ALWAYS_SYNCED) : - initial_opt(Options::FIRST_ACCESS), + initial_opt(Options::FIRST_ACCESS_WRITE), device(dev), active(false), id(mem_id), @@ -114,7 +120,7 @@ class CacheElem { * @brief reset access count * */ - void reset() { initial_opt = Options::FIRST_ACCESS; } + void reset() { initial_opt = Options::FIRST_ACCESS_WRITE; } private: Options initial_opt; /**< accessed */ diff --git a/nntrainer/tensor/cache_pool.cpp b/nntrainer/tensor/cache_pool.cpp index 2d555ceda..8f2d4e1f2 100644 --- a/nntrainer/tensor/cache_pool.cpp +++ b/nntrainer/tensor/cache_pool.cpp @@ -46,6 +46,9 @@ convertTensorLifespanToCachePolicy(const TensorLifespan lifespan) { case TensorLifespan::FORWARD_FUNC_LIFESPAN: policy = CachePolicy::TEMPORAL; break; + case TensorLifespan::FORWARD_INFER_LIFESPAN: + policy = CachePolicy::SYNC_ONCE; + break; case TensorLifespan::CALC_DERIV_LIFESPAN: policy = CachePolicy::TEMPORAL; break; diff --git a/nntrainer/tensor/swap_device.cpp b/nntrainer/tensor/swap_device.cpp index 0c215df36..c5f4c2a7b 100644 --- a/nntrainer/tensor/swap_device.cpp +++ b/nntrainer/tensor/swap_device.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -70,7 +71,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) { << std::string(strerror_r(errno, error_buf, error_buflen)); void *buf = static_cast(ptr + diff); - mapped[buf] = std::make_pair(ptr, len); + mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size); return buf; #else @@ -88,7 +89,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) { << "SwapDevice: seek file: " << dev_path; len = read(fd, ptr, size); - NNTR_THROW_IF(len != (ssize_t)size, std::runtime_error) + NNTR_THROW_IF(len != (size_t)size, std::runtime_error) << "SwapDevice: read file: " << dev_path; } @@ -107,7 +108,22 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) { NNTR_THROW_IF(mapped.find(ptr) == mapped.end(), std::runtime_error) << "Couldn't find buffer"; + off_t off; + ssize_t len; + auto info = mapped[ptr]; + if (!dealloc_only) { + off = lseek(fd, std::get<2>(info), SEEK_SET); + NNTR_THROW_IF(off < 0, std::runtime_error) + << "SwapDevice: seek file: " << dev_path; + + ssize_t size = std::get<3>(info); + len = write(fd, ptr, size); + NNTR_THROW_IF(len != size, std::runtime_error) + << "SwapDevice: write file: " << len << "::" << std::to_string(size) + << dev_path; + } + ret = munmap(std::get(info), std::get(info)); const size_t error_buflen = 100; char error_buf[error_buflen]; diff --git a/nntrainer/tensor/swap_device.h b/nntrainer/tensor/swap_device.h index 11d5dcd27..720c8d04b 100644 --- a/nntrainer/tensor/swap_device.h +++ b/nntrainer/tensor/swap_device.h @@ -26,7 +26,7 @@ #include /* Uncomment this to use mmap for swap data */ -//#define USE_MMAP +#define USE_MMAP namespace nntrainer { @@ -119,8 +119,8 @@ class SwapDevice { int fd; /**< device file description */ #ifdef USE_MMAP - std::map> - mapped; /**< > */ + std::map> + mapped; /**< > */ #else std::map> allocated; /**< > */