From acdae13dc1eafde8a8f71b038e332910f80bcd9d Mon Sep 17 00:00:00 2001
From: Jiho Chu <jiho.chu@samsung.com>
Date: Thu, 31 Aug 2023 14:53:03 +0900
Subject: [PATCH 1/3] [SWAP] Add swap mode property

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
---
 nntrainer.ini.in                             |  3 ++
 nntrainer/graph/network_graph.h              |  9 +++---
 nntrainer/models/model_common_properties.cpp |  2 ++
 nntrainer/models/model_common_properties.h   | 18 +++++++++++
 nntrainer/models/neuralnet.cpp               | 32 ++++++++++++--------
 nntrainer/models/neuralnet.h                 | 11 +++----
 nntrainer/tensor/manager.cpp                 |  4 ++-
 nntrainer/tensor/manager.h                   |  5 ++-
 8 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/nntrainer.ini.in b/nntrainer.ini.in
index 9bee8cd49..dc373dd65 100644
--- a/nntrainer.ini.in
+++ b/nntrainer.ini.in
@@ -16,3 +16,6 @@ memory_swap_path = @MEMORY_SWAP_PATH@
 
 # look ahead window size
 memory_swap_lookahead = @MEMORY_SWAP_LOOKAHEAD@
+
+# swap mode ("inference" or "train")
+memory_swap_lookahead = "train"
diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h
index 5c9adf036..49f71760a 100644
--- a/nntrainer/graph/network_graph.h
+++ b/nntrainer/graph/network_graph.h
@@ -58,12 +58,13 @@ class NetworkGraph {
    * @param[in] enable_swap enable memory swap for tensor
    * @param[in] swap_path memory swap file path when the swap is enabled
    */
-  NetworkGraph(bool enable_swap, const std::string &swap_path = "",
-               unsigned int lookahead = 0,
+  NetworkGraph(bool enable_swap, const std::string &swap_mode = "train",
+               const std::string &swap_path = "", unsigned int lookahead = 0,
                const std::string &tensor_format_ = "NCHW",
                const std::string &tensor_dtype_ = "FP32-FP32") :
-    tensor_manager(std::make_shared<Manager>(enable_swap, swap_path, lookahead,
-                                             tensor_format_, tensor_dtype_)),
+    tensor_manager(std::make_shared<Manager>(enable_swap, swap_mode, swap_path,
+                                             lookahead, tensor_format_,
+                                             tensor_dtype_)),
     graph(),
     compiled(false),
     batch_size(0),
diff --git a/nntrainer/models/model_common_properties.cpp b/nntrainer/models/model_common_properties.cpp
index 984cad662..aeb0b60f8 100644
--- a/nntrainer/models/model_common_properties.cpp
+++ b/nntrainer/models/model_common_properties.cpp
@@ -33,6 +33,8 @@ MemorySwap::MemorySwap(bool value) { set(value); }
 
 MemorySwapPath::MemorySwapPath(const std::string &value) { set(value); }
 
+MemorySwapMode::MemorySwapMode(const std::string &value) { set(value); }
+
 MemorySwapLookahead::MemorySwapLookahead(const unsigned int &value) {
   set(value);
 }
diff --git a/nntrainer/models/model_common_properties.h b/nntrainer/models/model_common_properties.h
index 3776afefc..333a78fea 100644
--- a/nntrainer/models/model_common_properties.h
+++ b/nntrainer/models/model_common_properties.h
@@ -179,6 +179,24 @@ class MemorySwapLookahead : public Property<unsigned int> {
   MemorySwapLookahead(const unsigned int &value = 0);
 };
 
+/**
+ * @brief cache file path property
+ *
+ */
+class MemorySwapMode : public Property<std::string> {
+public:
+  static constexpr const char *key =
+    "memory_swap_mode";          /**< unique key to access */
+  using prop_tag = str_prop_tag; /**< property type */
+
+  /**
+   * @brief Constructor
+   *
+   * @param value value to set, defaults to current directory
+   */
+  MemorySwapMode(const std::string &value = "train");
+};
+
 /**
  * @brief     Enumeration of Data Type for model & layer
  */
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
index d0e542825..692fe9909 100644
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -67,11 +67,12 @@ namespace nntrainer {
 NeuralNetwork::NeuralNetwork() :
   model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
               props::LossScale()),
-  model_flex_props(
-    props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
-    props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
-    props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
-    props::TensorFormat(), props::ModelTensorDataType()),
+  model_flex_props(props::Epochs(), props::TrainingBatchSize(),
+                   props::SavePath(), props::ContinueTrain(),
+                   props::SaveBestPath(), props::MemoryOptimization(),
+                   props::MemorySwap(), props::MemorySwapPath(),
+                   props::MemorySwapLookahead(), props::TensorFormat(),
+                   props::ModelTensorDataType(), props::MemorySwapMode()),
   load_path(std::string()),
   epoch_idx(0),
   iter(0),
@@ -86,11 +87,12 @@ NeuralNetwork::NeuralNetwork() :
 NeuralNetwork::NeuralNetwork(AppContext app_context_) :
   model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
               props::LossScale()),
-  model_flex_props(
-    props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
-    props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
-    props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
-    props::TensorFormat(), props::ModelTensorDataType()),
+  model_flex_props(props::Epochs(), props::TrainingBatchSize(),
+                   props::SavePath(), props::ContinueTrain(),
+                   props::SaveBestPath(), props::MemoryOptimization(),
+                   props::MemorySwap(), props::MemorySwapPath(),
+                   props::MemorySwapLookahead(), props::TensorFormat(),
+                   props::ModelTensorDataType(), props::MemorySwapMode()),
   load_path(std::string()),
   epoch_idx(0),
   iter(0),
@@ -172,6 +174,8 @@ int NeuralNetwork::compile() {
   bool memory_swap = std::get<props::MemorySwap>(model_flex_props);
   const std::string memory_swap_path =
     std::get<props::MemorySwapPath>(model_flex_props);
+  const std::string memory_swap_mode =
+    std::get<props::MemorySwapMode>(model_flex_props);
   unsigned int lookahead =
     std::get<props::MemorySwapLookahead>(model_flex_props);
 
@@ -181,8 +185,8 @@ int NeuralNetwork::compile() {
   const std::string tensor_type =
     to_string(std::get<props::ModelTensorDataType>(model_flex_props));
 
-  model_graph = NetworkGraph(memory_swap, memory_swap_path, lookahead,
-                             tensor_format, tensor_type);
+  model_graph = NetworkGraph(memory_swap, memory_swap_mode, memory_swap_path,
+                             lookahead, tensor_format, tensor_type);
 
   model_graph.setMemoryOptimizations(
     std::get<props::MemoryOptimization>(model_flex_props));
@@ -260,7 +264,9 @@ int NeuralNetwork::initialize(ExecutionMode mode) {
   }
 
   // Allocate weights
-  model_graph.allocateWeights();
+  const std::string memory_swap_mode =
+    std::get<props::MemorySwapMode>(model_flex_props);
+  model_graph.allocateWeights(memory_swap_mode.compare("inference") != 0);
 
   initialized = true;
 
diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h
index 30d2288fd..0e10d266b 100644
--- a/nntrainer/models/neuralnet.h
+++ b/nntrainer/models/neuralnet.h
@@ -624,12 +624,11 @@ s   * @retval shared_ptr<const Tensor>
                const std::string file_path) override;
 
 private:
-  using FlexiblePropTypes =
-    std::tuple<props::Epochs, props::TrainingBatchSize, props::SavePath,
-               props::ContinueTrain, props::SaveBestPath,
-               props::MemoryOptimization, props::MemorySwap,
-               props::MemorySwapPath, props::MemorySwapLookahead,
-               props::TensorFormat, props::ModelTensorDataType>;
+  using FlexiblePropTypes = std::tuple<
+    props::Epochs, props::TrainingBatchSize, props::SavePath,
+    props::ContinueTrain, props::SaveBestPath, props::MemoryOptimization,
+    props::MemorySwap, props::MemorySwapPath, props::MemorySwapLookahead,
+    props::TensorFormat, props::ModelTensorDataType, props::MemorySwapMode>;
   using RigidPropTypes =
     std::tuple<props::LossType, std::vector<props::InputConnection>,
                std::vector<props::LabelLayer>, props::ClipGradByGlobalNorm,
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp
index 8b4723579..bc1a37338 100644
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -376,7 +376,9 @@ std::vector<Weight *> Manager::requestWeights(
    *  and therefore, if we remove the calcDerivative order, then tests fails.
    */
 
-  TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN;
+  TensorLifespan var_ls = swap_mode == "inference"
+                            ? TensorLifespan::FORWARD_INFER_LIFESPAN
+                            : TensorLifespan::MAX_LIFESPAN;
   TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN;
 
   std::vector<Weight *> ret;
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h
index 9ccde7711..4b7881425 100644
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -141,13 +141,14 @@ class Manager {
   /**
    * @brief     Constructor of Manager
    */
-  Manager(bool enable_swap, const std::string &swap_path = "",
+  Manager(bool enable_swap, const std::string &swap_mode = "train", const std::string &swap_path = "",
           unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW",
           const std::string tensor_dtype_ = "FP32-FP32") :
     weight_pool(enable_swap, swap_path, "weight_pool"),
     tensor_pool(enable_swap, swap_path, "tensor_pool"),
     enable_optimizations(true),
     swap_lookahead(lookahead),
+    swap_mode(swap_mode),
     tensor_format(tensor_format_),
     tensor_dtype(split(tensor_dtype_, getRegex("\\-"))),
     exec_mode(ExecutionMode::TRAIN) {}
@@ -523,6 +524,8 @@ class Manager {
 
   unsigned int swap_lookahead; /** lookahead for memory swap */
 
+  std::string swap_mode; /** swap mode */
+
   std::string tensor_format;
 
   std::vector<std::string> tensor_dtype;

From 02c1474d5c560380b61015118d548a4448ac2e14 Mon Sep 17 00:00:00 2001
From: Jiho Chu <jiho.chu@samsung.com>
Date: Thu, 31 Aug 2023 16:06:09 +0900
Subject: [PATCH 2/3] [SWAP] Add inference mode

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
---
 nntrainer/graph/network_graph.h  |  4 ++--
 nntrainer/tensor/manager.cpp     |  4 ++--
 nntrainer/tensor/manager.h       | 10 ++++++----
 nntrainer/tensor/tensor_pool.cpp |  4 ++--
 nntrainer/tensor/tensor_pool.h   |  2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h
index 49f71760a..7a1073d07 100644
--- a/nntrainer/graph/network_graph.h
+++ b/nntrainer/graph/network_graph.h
@@ -356,9 +356,9 @@ class NetworkGraph {
   /**
    * @brief Allocate memory for all the managed weights
    */
-  void allocateWeights() {
+  void allocateWeights(bool init = true) {
     tensor_manager->allocateWeights(
-      std::get<3>(backward_iter_end->getExecutionOrder()));
+      std::get<3>(backward_iter_end->getExecutionOrder()), init);
   }
 
   /**
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp
index bc1a37338..1d0e0b610 100644
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -144,10 +144,10 @@ void Manager::reinitialize() {
   tensor_pool.reinitialize();
 }
 
-void Manager::allocateWeights(unsigned int max_exec_order_) {
+void Manager::allocateWeights(unsigned int max_exec_order_, bool init) {
   if (!weight_pool.isAllocated()) {
     finalizeTensorPool(weight_pool, 0, max_exec_order_);
-    weight_pool.allocate();
+    weight_pool.allocate(init);
   }
 }
 
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h
index 4b7881425..53aa8f299 100644
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -141,11 +141,13 @@ class Manager {
   /**
    * @brief     Constructor of Manager
    */
-  Manager(bool enable_swap, const std::string &swap_mode = "train", const std::string &swap_path = "",
-          unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW",
+  Manager(bool enable_swap, const std::string &swap_mode = "train",
+          const std::string &swap_path = "", unsigned int lookahead = 0,
+          const std::string tensor_format_ = "NCHW",
           const std::string tensor_dtype_ = "FP32-FP32") :
     weight_pool(enable_swap, swap_path, "weight_pool"),
-    tensor_pool(enable_swap, swap_path, "tensor_pool"),
+    tensor_pool(enable_swap && (swap_mode.compare("train") == 0), swap_path,
+                "tensor_pool"),
     enable_optimizations(true),
     swap_lookahead(lookahead),
     swap_mode(swap_mode),
@@ -382,7 +384,7 @@ class Manager {
    * @note this will make requests to the tensor pool and allocate the
    * corresponding weights
    */
-  void allocateWeights(unsigned int max_exec_order_);
+  void allocateWeights(unsigned int max_exec_order_, bool init = true);
 
   /**
    * @brief Deallocate memory for all the weights
diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp
index 0a69f1dce..27f22d8a0 100644
--- a/nntrainer/tensor/tensor_pool.cpp
+++ b/nntrainer/tensor/tensor_pool.cpp
@@ -217,7 +217,7 @@ void TensorPool::setBatchSize(const std::string &name, unsigned int batch) {
 /**
  * @brief Allocate memory for all the managed tensors
  */
-void TensorPool::allocate() {
+void TensorPool::allocate(bool init) {
   if (minMemoryRequirement() == 0)
     return;
   mem_pool->allocate();
@@ -229,7 +229,7 @@ void TensorPool::allocate() {
     if (!details || details->token == 0) {
       continue;
     }
-    spec.tensor->setData(mem_pool->getMemory(details->token), 0, true);
+    spec.tensor->setData(mem_pool->getMemory(details->token), 0, init);
     syncDependents(spec);
   }
 
diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h
index fd17db7cd..1d2addb52 100644
--- a/nntrainer/tensor/tensor_pool.h
+++ b/nntrainer/tensor/tensor_pool.h
@@ -92,7 +92,7 @@ class TensorPool {
   /**
    * @brief Allocate memory for all the managed tensors
    */
-  void allocate();
+  void allocate(bool init = true);
 
   /**
    * @brief Deallocate memory for all the managed tensors

From 42c1e8b872587d4e5048fe15ced08b987364d398 Mon Sep 17 00:00:00 2001
From: Jiho Chu <jiho.chu@samsung.com>
Date: Thu, 31 Aug 2023 16:02:40 +0900
Subject: [PATCH 3/3] [SWAP] Modify cache for inference mode

This patch is for inference mode for swap device.
It re-enable mmap feature, but writing time is controlled manually, due
to the inference mode handling.

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
---
 nntrainer/tensor/cache_elem.cpp  | 23 +++++++++++++++++------
 nntrainer/tensor/cache_elem.h    | 10 ++++++++--
 nntrainer/tensor/cache_pool.cpp  |  3 +++
 nntrainer/tensor/swap_device.cpp | 20 ++++++++++++++++++--
 nntrainer/tensor/swap_device.h   |  6 +++---
 5 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/nntrainer/tensor/cache_elem.cpp b/nntrainer/tensor/cache_elem.cpp
index 9a4f53c19..c7849d70a 100644
--- a/nntrainer/tensor/cache_elem.cpp
+++ b/nntrainer/tensor/cache_elem.cpp
@@ -23,10 +23,15 @@ namespace nntrainer {
 namespace {
 
 std::map<CachePolicy, std::string> policyToStr = {
-  {WRITE_BACK, "WRITE_BACK"},           {NO_WRITE_BACK, "NO_WRITE_BACK"},
-  {READ_CONSIST, "READ_CONSIST"},       {NO_READ_CONSIST, "NO_READ_CONSIST"},
-  {ALWAYS_SYNCED, "ALWAYS_SYNCED"},     {TEMPORAL, "TEMPORAL"},
-  {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}};
+  {WRITE_BACK, "WRITE_BACK"},
+  {NO_WRITE_BACK, "NO_WRITE_BACK"},
+  {READ_CONSIST, "READ_CONSIST"},
+  {NO_READ_CONSIST, "NO_READ_CONSIST"},
+  {ALWAYS_SYNCED, "ALWAYS_SYNCED"},
+  {TEMPORAL, "TEMPORAL"},
+  {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"},
+  {ITERATION_CONSIST, "ITER_CONSIST"},
+  {SYNC_ONCE, "SYNC_ONCE"}};
 
 inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
   return ((policy & CachePolicy::NO_READ_CONSIST) ||
@@ -37,7 +42,9 @@ inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
 inline bool checkDeallocOnly(CachePolicy policy, CacheElem::Options opt) {
   return ((policy & CachePolicy::NO_READ_CONSIST) ||
           ((opt & CacheElem::Options::LAST_ACCESS) &&
-           (policy & CachePolicy::FIRST_LAST_SKIP)));
+           (policy & CachePolicy::FIRST_LAST_SKIP)) ||
+          ((policy & FRIST_WRITE_CONSIST) &&
+           !(opt & CacheElem::Options::FIRST_WRITE)));
 }
 
 } // namespace
@@ -49,7 +56,7 @@ void CacheElem::swapIn(Options opt) {
   bool alloc_only = checkAllocOnly(policy, opt);
   void *buf = device->getBuffer(offset, length, alloc_only);
 
-  initial_opt = Options::NONE;
+  initial_opt = static_cast<Options>(initial_opt & ~Options::FIRST_ACCESS);
   mem_data->setAddr((void *)buf);
   mem_data->setValid(true);
   active = true;
@@ -63,8 +70,12 @@ void CacheElem::swapIn(Options opt) {
 
 void CacheElem::swapOut(Options opt) {
   std::lock_guard<std::mutex> lock(device_mutex);
+
+  opt = static_cast<Options>(opt | initial_opt);
   bool dealloc_only = checkDeallocOnly(policy, opt);
   void *buf = (void *)mem_data->getAddr();
+
+  initial_opt = static_cast<Options>(initial_opt & ~Options::FIRST_WRITE);
   device->putBuffer(buf, dealloc_only);
   mem_data->setAddr(nullptr);
   mem_data->setValid(false);
diff --git a/nntrainer/tensor/cache_elem.h b/nntrainer/tensor/cache_elem.h
index 9c024de49..0330df470 100644
--- a/nntrainer/tensor/cache_elem.h
+++ b/nntrainer/tensor/cache_elem.h
@@ -33,9 +33,12 @@ enum CachePolicy {
               NO_WRITE_BACK), /**< Will not be synchronized with device */
   FIRST_LAST_SKIP = 0b10000,
   /**< Will skip first read and last write */
+  FRIST_WRITE_CONSIST = 0b100000, /**< First invalidate will write to device */
   ITERATION_CONSIST = (FIRST_LAST_SKIP | ALWAYS_SYNCED),
   /**< Will skip first read and last write. other behaviors will be same as
      ALWAYS_SYNCED */
+  SYNC_ONCE = (FRIST_WRITE_CONSIST | READ_CONSIST | NO_WRITE_BACK),
+  /**< Will sync at first from the device, and the value will always consist */
 };
 
 /**
@@ -48,6 +51,9 @@ class CacheElem {
     NONE = 0b0000,         /**< No option */
     FIRST_ACCESS = 0x0001, /**< First Access */
     LAST_ACCESS = 0x0010,  /**< Last Access */
+    FIRST_WRITE = 0x0100,  /**< First Write */
+    FIRST_ACCESS_WRITE = FIRST_ACCESS | FIRST_WRITE,
+    /**< First access & write */
   };
 
   /**
@@ -57,7 +63,7 @@ class CacheElem {
   explicit CacheElem(std::shared_ptr<SwapDevice> dev, unsigned int mem_id,
                      size_t off, size_t len, std::shared_ptr<MemoryData> data,
                      CachePolicy pol = CachePolicy::ALWAYS_SYNCED) :
-    initial_opt(Options::FIRST_ACCESS),
+    initial_opt(Options::FIRST_ACCESS_WRITE),
     device(dev),
     active(false),
     id(mem_id),
@@ -114,7 +120,7 @@ class CacheElem {
    * @brief reset access count
    *
    */
-  void reset() { initial_opt = Options::FIRST_ACCESS; }
+  void reset() { initial_opt = Options::FIRST_ACCESS_WRITE; }
 
 private:
   Options initial_opt;                  /**< accessed */
diff --git a/nntrainer/tensor/cache_pool.cpp b/nntrainer/tensor/cache_pool.cpp
index 2d555ceda..8f2d4e1f2 100644
--- a/nntrainer/tensor/cache_pool.cpp
+++ b/nntrainer/tensor/cache_pool.cpp
@@ -46,6 +46,9 @@ convertTensorLifespanToCachePolicy(const TensorLifespan lifespan) {
   case TensorLifespan::FORWARD_FUNC_LIFESPAN:
     policy = CachePolicy::TEMPORAL;
     break;
+  case TensorLifespan::FORWARD_INFER_LIFESPAN:
+    policy = CachePolicy::SYNC_ONCE;
+    break;
   case TensorLifespan::CALC_DERIV_LIFESPAN:
     policy = CachePolicy::TEMPORAL;
     break;
diff --git a/nntrainer/tensor/swap_device.cpp b/nntrainer/tensor/swap_device.cpp
index 0c215df36..c5f4c2a7b 100644
--- a/nntrainer/tensor/swap_device.cpp
+++ b/nntrainer/tensor/swap_device.cpp
@@ -16,6 +16,7 @@
 #include <profiler.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -70,7 +71,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
     << std::string(strerror_r(errno, error_buf, error_buflen));
 
   void *buf = static_cast<void *>(ptr + diff);
-  mapped[buf] = std::make_pair(ptr, len);
+  mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size);
 
   return buf;
 #else
@@ -88,7 +89,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
       << "SwapDevice: seek file: " << dev_path;
 
     len = read(fd, ptr, size);
-    NNTR_THROW_IF(len != (ssize_t)size, std::runtime_error)
+    NNTR_THROW_IF(len != (size_t)size, std::runtime_error)
       << "SwapDevice: read file: " << dev_path;
   }
 
@@ -107,7 +108,22 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) {
   NNTR_THROW_IF(mapped.find(ptr) == mapped.end(), std::runtime_error)
     << "Couldn't find buffer";
 
+  off_t off;
+  ssize_t len;
+
   auto info = mapped[ptr];
+  if (!dealloc_only) {
+    off = lseek(fd, std::get<2>(info), SEEK_SET);
+    NNTR_THROW_IF(off < 0, std::runtime_error)
+      << "SwapDevice: seek file: " << dev_path;
+
+    ssize_t size = std::get<3>(info);
+    len = write(fd, ptr, size);
+    NNTR_THROW_IF(len != size, std::runtime_error)
+      << "SwapDevice: write file: " << len << "::" << std::to_string(size)
+      << dev_path;
+  }
+
   ret = munmap(std::get<void *>(info), std::get<size_t>(info));
   const size_t error_buflen = 100;
   char error_buf[error_buflen];
diff --git a/nntrainer/tensor/swap_device.h b/nntrainer/tensor/swap_device.h
index 11d5dcd27..720c8d04b 100644
--- a/nntrainer/tensor/swap_device.h
+++ b/nntrainer/tensor/swap_device.h
@@ -26,7 +26,7 @@
 #include <utility>
 
 /* Uncomment this to use mmap for swap data */
-//#define USE_MMAP
+#define USE_MMAP
 
 namespace nntrainer {
 
@@ -119,8 +119,8 @@ class SwapDevice {
   int fd;                     /**< device file description */
 
 #ifdef USE_MMAP
-  std::map<void *, std::pair<void *, size_t>>
-    mapped; /**< <pointer, <orig_pointer, size>> */
+  std::map<void *, std::tuple<void *, size_t, off_t, ssize_t>>
+    mapped; /**< <pointer, <orig_pointer, size, offset, origianl size>> */
 #else
   std::map<void *, std::pair<off_t, ssize_t>>
     allocated; /**< <pointer, <offset, size>> */