From 70967764de236a28f1c8ab1a1c5d83aaff745c49 Mon Sep 17 00:00:00 2001
From: Jiho Chu <jiho.chu@samsung.com>
Date: Thu, 31 Aug 2023 16:02:40 +0900
Subject: [PATCH] [SWAP] Modify cache for inference mode

This patch is for inference mode for swap device.
It re-enable mmap feature, but writing time is controlled manually, due
to the inference mode handling.

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
---
 nntrainer/tensor/cache_elem.cpp  | 16 +++++++++++++---
 nntrainer/tensor/cache_elem.h    | 10 ++++++++--
 nntrainer/tensor/cache_pool.cpp  |  3 +++
 nntrainer/tensor/swap_device.cpp | 19 +++++++++++++++++--
 nntrainer/tensor/swap_device.h   |  6 +++---
 5 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/nntrainer/tensor/cache_elem.cpp b/nntrainer/tensor/cache_elem.cpp
index 9a4f53c19..91b70c0d1 100644
--- a/nntrainer/tensor/cache_elem.cpp
+++ b/nntrainer/tensor/cache_elem.cpp
@@ -26,7 +26,9 @@ std::map<CachePolicy, std::string> policyToStr = {
   {WRITE_BACK, "WRITE_BACK"},           {NO_WRITE_BACK, "NO_WRITE_BACK"},
   {READ_CONSIST, "READ_CONSIST"},       {NO_READ_CONSIST, "NO_READ_CONSIST"},
   {ALWAYS_SYNCED, "ALWAYS_SYNCED"},     {TEMPORAL, "TEMPORAL"},
-  {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}};
+  {FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"},
+  {SYNC_ONCE, "SYNC_ONCE"}
+};
 
 inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
   return ((policy & CachePolicy::NO_READ_CONSIST) ||
@@ -37,7 +39,9 @@ inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
 inline bool checkDeallocOnly(CachePolicy policy, CacheElem::Options opt) {
   return ((policy & CachePolicy::NO_READ_CONSIST) ||
           ((opt & CacheElem::Options::LAST_ACCESS) &&
-           (policy & CachePolicy::FIRST_LAST_SKIP)));
+           (policy & CachePolicy::FIRST_LAST_SKIP)) ||
+          ((policy & FRIST_WRITE_CONSIST) &&
+           !(opt & CacheElem::Options::FIRST_WRITE)));
 }
 
 } // namespace
@@ -49,7 +53,8 @@ void CacheElem::swapIn(Options opt) {
   bool alloc_only = checkAllocOnly(policy, opt);
   void *buf = device->getBuffer(offset, length, alloc_only);
 
-  initial_opt = Options::NONE;
+  initial_opt =
+    static_cast<Options>(initial_opt & ~Options::FIRST_ACCESS);
   mem_data->setAddr((void *)buf);
   mem_data->setValid(true);
   active = true;
@@ -63,8 +68,13 @@ void CacheElem::swapIn(Options opt) {
 
 void CacheElem::swapOut(Options opt) {
   std::lock_guard<std::mutex> lock(device_mutex);
+
+  opt = static_cast<Options>(opt | initial_opt);
   bool dealloc_only = checkDeallocOnly(policy, opt);
   void *buf = (void *)mem_data->getAddr();
+
+  initial_opt =
+    static_cast<Options>(initial_opt & ~Options::FIRST_WRITE);
   device->putBuffer(buf, dealloc_only);
   mem_data->setAddr(nullptr);
   mem_data->setValid(false);
diff --git a/nntrainer/tensor/cache_elem.h b/nntrainer/tensor/cache_elem.h
index de891d407..a4c6e88c0 100644
--- a/nntrainer/tensor/cache_elem.h
+++ b/nntrainer/tensor/cache_elem.h
@@ -33,9 +33,12 @@ enum CachePolicy {
               NO_WRITE_BACK), /**< Will not be synchronized with device */
   FIRST_LAST_SKIP = 0b10000,
   /**< Will skip first read and last write */
+  FRIST_WRITE_CONSIST = 0b100000,    /**< First invalidate will write to device */
   ITERATION_CONSIST = (FIRST_LAST_SKIP | ALWAYS_SYNCED),
   /**< Will skip first read and last write. other behaviors will be same as
      ALWAYS_SYNCED */
+  SYNC_ONCE = (FRIST_WRITE_CONSIST | READ_CONSIST | NO_WRITE_BACK),
+  /**< Will sync at first from the device, and the value will always consist */
 };
 
 /**
@@ -48,6 +51,9 @@ class CacheElem {
     NONE = 0b0000,         /**< No option */
     FIRST_ACCESS = 0x0001, /**< First Access */
     LAST_ACCESS = 0x0010,  /**< Last Access */
+    FIRST_WRITE = 0x0100,  /**< First Write */
+    FIRST_ACCESS_WRITE = FIRST_ACCESS | FIRST_WRITE,
+    /**< First access & write */
   };
 
   /**
@@ -58,7 +64,7 @@ class CacheElem {
                      size_t off, size_t len,
                      std::shared_ptr<MemoryData> data,
                      CachePolicy pol = CachePolicy::ALWAYS_SYNCED) :
-    initial_opt(Options::FIRST_ACCESS),
+    initial_opt(Options::FIRST_ACCESS_WRITE),
     device(dev),
     active(false),
     id(mem_id),
@@ -112,7 +118,7 @@ class CacheElem {
    * @brief reset access count
    *
    */
-  void reset() { initial_opt = Options::FIRST_ACCESS; }
+  void reset() { initial_opt = Options::FIRST_ACCESS_WRITE; }
 
 private:
   Options initial_opt;                /**< accessed */
diff --git a/nntrainer/tensor/cache_pool.cpp b/nntrainer/tensor/cache_pool.cpp
index 2d555ceda..8f2d4e1f2 100644
--- a/nntrainer/tensor/cache_pool.cpp
+++ b/nntrainer/tensor/cache_pool.cpp
@@ -46,6 +46,9 @@ convertTensorLifespanToCachePolicy(const TensorLifespan lifespan) {
   case TensorLifespan::FORWARD_FUNC_LIFESPAN:
     policy = CachePolicy::TEMPORAL;
     break;
+  case TensorLifespan::FORWARD_INFER_LIFESPAN:
+    policy = CachePolicy::SYNC_ONCE;
+    break;
   case TensorLifespan::CALC_DERIV_LIFESPAN:
     policy = CachePolicy::TEMPORAL;
     break;
diff --git a/nntrainer/tensor/swap_device.cpp b/nntrainer/tensor/swap_device.cpp
index 0c215df36..c33eeaad1 100644
--- a/nntrainer/tensor/swap_device.cpp
+++ b/nntrainer/tensor/swap_device.cpp
@@ -16,6 +16,7 @@
 #include <profiler.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -70,7 +71,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
     << std::string(strerror_r(errno, error_buf, error_buflen));
 
   void *buf = static_cast<void *>(ptr + diff);
-  mapped[buf] = std::make_pair(ptr, len);
+  mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size);
 
   return buf;
 #else
@@ -88,7 +89,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
       << "SwapDevice: seek file: " << dev_path;
 
     len = read(fd, ptr, size);
-    NNTR_THROW_IF(len != (ssize_t)size, std::runtime_error)
+    NNTR_THROW_IF(len != (size_t)size, std::runtime_error)
       << "SwapDevice: read file: " << dev_path;
   }
 
@@ -107,7 +108,21 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) {
   NNTR_THROW_IF(mapped.find(ptr) == mapped.end(), std::runtime_error)
     << "Couldn't find buffer";
 
+  off_t off;
+  ssize_t len;
+
   auto info = mapped[ptr];
+  if (!dealloc_only) {
+    off = lseek(fd, std::get<2>(info), SEEK_SET);
+    NNTR_THROW_IF(off < 0, std::runtime_error)
+      << "SwapDevice: seek file: " << dev_path;
+
+    ssize_t size = std::get<3>(info);
+    len = write(fd, ptr, size);
+    NNTR_THROW_IF(len != size, std::runtime_error)
+    << "SwapDevice: write file: " << len << "::"<< std::to_string(size) << dev_path;
+  }
+
   ret = munmap(std::get<void *>(info), std::get<size_t>(info));
   const size_t error_buflen = 100;
   char error_buf[error_buflen];
diff --git a/nntrainer/tensor/swap_device.h b/nntrainer/tensor/swap_device.h
index 11d5dcd27..720c8d04b 100644
--- a/nntrainer/tensor/swap_device.h
+++ b/nntrainer/tensor/swap_device.h
@@ -26,7 +26,7 @@
 #include <utility>
 
 /* Uncomment this to use mmap for swap data */
-//#define USE_MMAP
+#define USE_MMAP
 
 namespace nntrainer {
 
@@ -119,8 +119,8 @@ class SwapDevice {
   int fd;                     /**< device file description */
 
 #ifdef USE_MMAP
-  std::map<void *, std::pair<void *, size_t>>
-    mapped; /**< <pointer, <orig_pointer, size>> */
+  std::map<void *, std::tuple<void *, size_t, off_t, ssize_t>>
+    mapped; /**< <pointer, <orig_pointer, size, offset, origianl size>> */
 #else
   std::map<void *, std::pair<off_t, ssize_t>>
     allocated; /**< <pointer, <offset, size>> */