Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SWAP] Implement inference mode #2696

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions nntrainer.ini.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ memory_swap_path = @MEMORY_SWAP_PATH@

# look ahead window size
memory_swap_lookahead = @MEMORY_SWAP_LOOKAHEAD@

# swap mode ("inference" or "train")
memory_swap_lookahead = "train"
13 changes: 7 additions & 6 deletions nntrainer/graph/network_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ class NetworkGraph {
* @param[in] enable_swap enable memory swap for tensor
* @param[in] swap_path memory swap file path when the swap is enabled
*/
NetworkGraph(bool enable_swap, const std::string &swap_path = "",
unsigned int lookahead = 0,
NetworkGraph(bool enable_swap, const std::string &swap_mode = "train",
const std::string &swap_path = "", unsigned int lookahead = 0,
const std::string &tensor_format_ = "NCHW",
const std::string &tensor_dtype_ = "FP32-FP32") :
tensor_manager(std::make_shared<Manager>(enable_swap, swap_path, lookahead,
tensor_format_, tensor_dtype_)),
tensor_manager(std::make_shared<Manager>(enable_swap, swap_mode, swap_path,
lookahead, tensor_format_,
tensor_dtype_)),
graph(),
compiled(false),
batch_size(0),
Expand Down Expand Up @@ -355,9 +356,9 @@ class NetworkGraph {
/**
* @brief Allocate memory for all the managed weights
*/
void allocateWeights() {
void allocateWeights(bool init = true) {
tensor_manager->allocateWeights(
std::get<3>(backward_iter_end->getExecutionOrder()));
std::get<3>(backward_iter_end->getExecutionOrder()), init);
}

/**
Expand Down
2 changes: 2 additions & 0 deletions nntrainer/models/model_common_properties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ MemorySwap::MemorySwap(bool value) { set(value); }

MemorySwapPath::MemorySwapPath(const std::string &value) { set(value); }

MemorySwapMode::MemorySwapMode(const std::string &value) { set(value); }

MemorySwapLookahead::MemorySwapLookahead(const unsigned int &value) {
set(value);
}
Expand Down
18 changes: 18 additions & 0 deletions nntrainer/models/model_common_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,24 @@ class MemorySwapLookahead : public Property<unsigned int> {
MemorySwapLookahead(const unsigned int &value = 0);
};

/**
* @brief cache file path property
*
*/
class MemorySwapMode : public Property<std::string> {
public:
static constexpr const char *key =
"memory_swap_mode"; /**< unique key to access */
using prop_tag = str_prop_tag; /**< property type */

/**
* @brief Constructor
*
* @param value value to set, defaults to current directory
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @param value value to set, defaults to current directory
* @param value value to set, defaults to "train" mode

*/
MemorySwapMode(const std::string &value = "train");
};

/**
* @brief Enumeration of Data Type for model & layer
*/
Expand Down
32 changes: 19 additions & 13 deletions nntrainer/models/neuralnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ namespace nntrainer {
NeuralNetwork::NeuralNetwork() :
model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
props::LossScale()),
model_flex_props(
props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
props::TensorFormat(), props::ModelTensorDataType()),
model_flex_props(props::Epochs(), props::TrainingBatchSize(),
props::SavePath(), props::ContinueTrain(),
props::SaveBestPath(), props::MemoryOptimization(),
props::MemorySwap(), props::MemorySwapPath(),
props::MemorySwapLookahead(), props::TensorFormat(),
props::ModelTensorDataType(), props::MemorySwapMode()),
load_path(std::string()),
epoch_idx(0),
iter(0),
Expand All @@ -86,11 +87,12 @@ NeuralNetwork::NeuralNetwork() :
NeuralNetwork::NeuralNetwork(AppContext app_context_) :
model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
props::LossScale()),
model_flex_props(
props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
props::TensorFormat(), props::ModelTensorDataType()),
model_flex_props(props::Epochs(), props::TrainingBatchSize(),
props::SavePath(), props::ContinueTrain(),
props::SaveBestPath(), props::MemoryOptimization(),
props::MemorySwap(), props::MemorySwapPath(),
props::MemorySwapLookahead(), props::TensorFormat(),
props::ModelTensorDataType(), props::MemorySwapMode()),
load_path(std::string()),
epoch_idx(0),
iter(0),
Expand Down Expand Up @@ -172,6 +174,8 @@ int NeuralNetwork::compile() {
bool memory_swap = std::get<props::MemorySwap>(model_flex_props);
const std::string memory_swap_path =
std::get<props::MemorySwapPath>(model_flex_props);
const std::string memory_swap_mode =
std::get<props::MemorySwapMode>(model_flex_props);
unsigned int lookahead =
std::get<props::MemorySwapLookahead>(model_flex_props);

Expand All @@ -181,8 +185,8 @@ int NeuralNetwork::compile() {
const std::string tensor_type =
to_string(std::get<props::ModelTensorDataType>(model_flex_props));

model_graph = NetworkGraph(memory_swap, memory_swap_path, lookahead,
tensor_format, tensor_type);
model_graph = NetworkGraph(memory_swap, memory_swap_mode, memory_swap_path,
lookahead, tensor_format, tensor_type);

model_graph.setMemoryOptimizations(
std::get<props::MemoryOptimization>(model_flex_props));
Expand Down Expand Up @@ -260,7 +264,9 @@ int NeuralNetwork::initialize(ExecutionMode mode) {
}

// Allocate weights
model_graph.allocateWeights();
const std::string memory_swap_mode =
std::get<props::MemorySwapMode>(model_flex_props);
model_graph.allocateWeights(memory_swap_mode.compare("inference") != 0);

initialized = true;

Expand Down
11 changes: 5 additions & 6 deletions nntrainer/models/neuralnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,12 +624,11 @@ s * @retval shared_ptr<const Tensor>
const std::string file_path) override;

private:
using FlexiblePropTypes =
std::tuple<props::Epochs, props::TrainingBatchSize, props::SavePath,
props::ContinueTrain, props::SaveBestPath,
props::MemoryOptimization, props::MemorySwap,
props::MemorySwapPath, props::MemorySwapLookahead,
props::TensorFormat, props::ModelTensorDataType>;
using FlexiblePropTypes = std::tuple<
props::Epochs, props::TrainingBatchSize, props::SavePath,
props::ContinueTrain, props::SaveBestPath, props::MemoryOptimization,
props::MemorySwap, props::MemorySwapPath, props::MemorySwapLookahead,
props::TensorFormat, props::ModelTensorDataType, props::MemorySwapMode>;
using RigidPropTypes =
std::tuple<props::LossType, std::vector<props::InputConnection>,
std::vector<props::LabelLayer>, props::ClipGradByGlobalNorm,
Expand Down
23 changes: 17 additions & 6 deletions nntrainer/tensor/cache_elem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,15 @@ namespace nntrainer {
namespace {

std::map<CachePolicy, std::string> policyToStr = {
{WRITE_BACK, "WRITE_BACK"}, {NO_WRITE_BACK, "NO_WRITE_BACK"},
{READ_CONSIST, "READ_CONSIST"}, {NO_READ_CONSIST, "NO_READ_CONSIST"},
{ALWAYS_SYNCED, "ALWAYS_SYNCED"}, {TEMPORAL, "TEMPORAL"},
{FIRST_LAST_SKIP, "FIRST_LAST_SKIP"}, {ITERATION_CONSIST, "ITER_CONSIST"}};
{WRITE_BACK, "WRITE_BACK"},
{NO_WRITE_BACK, "NO_WRITE_BACK"},
{READ_CONSIST, "READ_CONSIST"},
{NO_READ_CONSIST, "NO_READ_CONSIST"},
{ALWAYS_SYNCED, "ALWAYS_SYNCED"},
{TEMPORAL, "TEMPORAL"},
{FIRST_LAST_SKIP, "FIRST_LAST_SKIP"},
{ITERATION_CONSIST, "ITER_CONSIST"},
{SYNC_ONCE, "SYNC_ONCE"}};

inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
return ((policy & CachePolicy::NO_READ_CONSIST) ||
Expand All @@ -37,7 +42,9 @@ inline bool checkAllocOnly(CachePolicy policy, CacheElem::Options opt) {
inline bool checkDeallocOnly(CachePolicy policy, CacheElem::Options opt) {
return ((policy & CachePolicy::NO_READ_CONSIST) ||
((opt & CacheElem::Options::LAST_ACCESS) &&
(policy & CachePolicy::FIRST_LAST_SKIP)));
(policy & CachePolicy::FIRST_LAST_SKIP)) ||
((policy & FRIST_WRITE_CONSIST) &&
!(opt & CacheElem::Options::FIRST_WRITE)));
}

} // namespace
Expand All @@ -49,7 +56,7 @@ void CacheElem::swapIn(Options opt) {
bool alloc_only = checkAllocOnly(policy, opt);
void *buf = device->getBuffer(offset, length, alloc_only);

initial_opt = Options::NONE;
initial_opt = static_cast<Options>(initial_opt & ~Options::FIRST_ACCESS);
mem_data->setAddr((void *)buf);
mem_data->setValid(true);
active = true;
Expand All @@ -63,8 +70,12 @@ void CacheElem::swapIn(Options opt) {

void CacheElem::swapOut(Options opt) {
std::lock_guard<std::mutex> lock(device_mutex);

opt = static_cast<Options>(opt | initial_opt);
bool dealloc_only = checkDeallocOnly(policy, opt);
void *buf = (void *)mem_data->getAddr();

initial_opt = static_cast<Options>(initial_opt & ~Options::FIRST_WRITE);
device->putBuffer(buf, dealloc_only);
mem_data->setAddr(nullptr);
mem_data->setValid(false);
Expand Down
10 changes: 8 additions & 2 deletions nntrainer/tensor/cache_elem.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ enum CachePolicy {
NO_WRITE_BACK), /**< Will not be synchronized with device */
FIRST_LAST_SKIP = 0b10000,
/**< Will skip first read and last write */
FRIST_WRITE_CONSIST = 0b100000, /**< First invalidate will write to device */
ITERATION_CONSIST = (FIRST_LAST_SKIP | ALWAYS_SYNCED),
/**< Will skip first read and last write. other behaviors will be same as
ALWAYS_SYNCED */
SYNC_ONCE = (FRIST_WRITE_CONSIST | READ_CONSIST | NO_WRITE_BACK),
/**< Will sync at first from the device, and the value will always consist */
};

/**
Expand All @@ -48,6 +51,9 @@ class CacheElem {
NONE = 0b0000, /**< No option */
FIRST_ACCESS = 0x0001, /**< First Access */
LAST_ACCESS = 0x0010, /**< Last Access */
FIRST_WRITE = 0x0100, /**< First Write */
FIRST_ACCESS_WRITE = FIRST_ACCESS | FIRST_WRITE,
/**< First access & write */
};

/**
Expand All @@ -57,7 +63,7 @@ class CacheElem {
explicit CacheElem(std::shared_ptr<SwapDevice> dev, unsigned int mem_id,
size_t off, size_t len, std::shared_ptr<MemoryData> data,
CachePolicy pol = CachePolicy::ALWAYS_SYNCED) :
initial_opt(Options::FIRST_ACCESS),
initial_opt(Options::FIRST_ACCESS_WRITE),
device(dev),
active(false),
id(mem_id),
Expand Down Expand Up @@ -114,7 +120,7 @@ class CacheElem {
* @brief reset access count
*
*/
void reset() { initial_opt = Options::FIRST_ACCESS; }
void reset() { initial_opt = Options::FIRST_ACCESS_WRITE; }

private:
Options initial_opt; /**< accessed */
Expand Down
3 changes: 3 additions & 0 deletions nntrainer/tensor/cache_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ convertTensorLifespanToCachePolicy(const TensorLifespan lifespan) {
case TensorLifespan::FORWARD_FUNC_LIFESPAN:
policy = CachePolicy::TEMPORAL;
break;
case TensorLifespan::FORWARD_INFER_LIFESPAN:
policy = CachePolicy::SYNC_ONCE;
break;
case TensorLifespan::CALC_DERIV_LIFESPAN:
policy = CachePolicy::TEMPORAL;
break;
Expand Down
8 changes: 5 additions & 3 deletions nntrainer/tensor/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ void Manager::reinitialize() {
tensor_pool.reinitialize();
}

void Manager::allocateWeights(unsigned int max_exec_order_) {
void Manager::allocateWeights(unsigned int max_exec_order_, bool init) {
if (!weight_pool.isAllocated()) {
finalizeTensorPool(weight_pool, 0, max_exec_order_);
weight_pool.allocate();
weight_pool.allocate(init);
}
}

Expand Down Expand Up @@ -376,7 +376,9 @@ std::vector<Weight *> Manager::requestWeights(
* and therefore, if we remove the calcDerivative order, then tests fails.
*/

TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN;
TensorLifespan var_ls = swap_mode == "inference"
? TensorLifespan::FORWARD_INFER_LIFESPAN
: TensorLifespan::MAX_LIFESPAN;
TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a pure question (it may be irrelevant to this PR, though).
Do we need grad_ls for the inference swap mode? Is it related to the loss thing?


std::vector<Weight *> ret;
Expand Down
13 changes: 9 additions & 4 deletions nntrainer/tensor/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,16 @@ class Manager {
/**
* @brief Constructor of Manager
*/
Manager(bool enable_swap, const std::string &swap_path = "",
unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW",
Manager(bool enable_swap, const std::string &swap_mode = "train",
const std::string &swap_path = "", unsigned int lookahead = 0,
const std::string tensor_format_ = "NCHW",
const std::string tensor_dtype_ = "FP32-FP32") :
weight_pool(enable_swap, swap_path, "weight_pool"),
tensor_pool(enable_swap, swap_path, "tensor_pool"),
tensor_pool(enable_swap && (swap_mode.compare("train") == 0), swap_path,
"tensor_pool"),
enable_optimizations(true),
swap_lookahead(lookahead),
swap_mode(swap_mode),
tensor_format(tensor_format_),
tensor_dtype(split(tensor_dtype_, getRegex("\\-"))),
exec_mode(ExecutionMode::TRAIN) {}
Expand Down Expand Up @@ -381,7 +384,7 @@ class Manager {
* @note this will make requests to the tensor pool and allocate the
* corresponding weights
*/
void allocateWeights(unsigned int max_exec_order_);
void allocateWeights(unsigned int max_exec_order_, bool init = true);

/**
* @brief Deallocate memory for all the weights
Expand Down Expand Up @@ -523,6 +526,8 @@ class Manager {

unsigned int swap_lookahead; /** lookahead for memory swap */

std::string swap_mode; /** swap mode */

std::string tensor_format;

std::vector<std::string> tensor_dtype;
Expand Down
20 changes: 18 additions & 2 deletions nntrainer/tensor/swap_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <profiler.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>

Expand Down Expand Up @@ -70,7 +71,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
<< std::string(strerror_r(errno, error_buf, error_buflen));

void *buf = static_cast<void *>(ptr + diff);
mapped[buf] = std::make_pair(ptr, len);
mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size);

return buf;
#else
Expand All @@ -88,7 +89,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
<< "SwapDevice: seek file: " << dev_path;

len = read(fd, ptr, size);
NNTR_THROW_IF(len != (ssize_t)size, std::runtime_error)
NNTR_THROW_IF(len != (size_t)size, std::runtime_error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to cast 'ssize_t' to 'size_t'?
because it looks 'read func' returns 'ssize_t' type.

<< "SwapDevice: read file: " << dev_path;
}

Expand All @@ -107,7 +108,22 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) {
NNTR_THROW_IF(mapped.find(ptr) == mapped.end(), std::runtime_error)
<< "Couldn't find buffer";

off_t off;
ssize_t len;

auto info = mapped[ptr];
if (!dealloc_only) {
off = lseek(fd, std::get<2>(info), SEEK_SET);
NNTR_THROW_IF(off < 0, std::runtime_error)
<< "SwapDevice: seek file: " << dev_path;

ssize_t size = std::get<3>(info);
len = write(fd, ptr, size);
NNTR_THROW_IF(len != size, std::runtime_error)
<< "SwapDevice: write file: " << len << "::" << std::to_string(size)
<< dev_path;
}

ret = munmap(std::get<void *>(info), std::get<size_t>(info));
const size_t error_buflen = 100;
char error_buf[error_buflen];
Expand Down
6 changes: 3 additions & 3 deletions nntrainer/tensor/swap_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include <utility>

/* Uncomment this to use mmap for swap data */
//#define USE_MMAP
#define USE_MMAP

namespace nntrainer {

Expand Down Expand Up @@ -119,8 +119,8 @@ class SwapDevice {
int fd; /**< device file description */

#ifdef USE_MMAP
std::map<void *, std::pair<void *, size_t>>
mapped; /**< <pointer, <orig_pointer, size>> */
std::map<void *, std::tuple<void *, size_t, off_t, ssize_t>>
mapped; /**< <pointer, <orig_pointer, size, offset, origianl size>> */
#else
std::map<void *, std::pair<off_t, ssize_t>>
allocated; /**< <pointer, <offset, size>> */
Expand Down
Loading
Loading