Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2567] [ Test ] Mixed Precision Test Case #2568

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Applications/KNN/jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ e = executable('knn_sample',
install_dir: application_install_dir
)

test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
8 changes: 5 additions & 3 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,9 +768,10 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
* node is going to be used with in-place optimizations.
*/
auto out_specs = init_context.getOutSpecs();

/// @note try move inplace control to finalize
bool shared_var = false, shared_grad = false;
if (lnode->executeInPlace() != InPlace::NONE) {
if (lnode->executeInPlace() != InPlace::NONE && lnode->supportInPlace()) {
setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
for (unsigned int i = 0; i < out_specs.size(); ++i) {
auto &s = out_specs.at(i);
Expand Down Expand Up @@ -1556,8 +1557,9 @@ void NetworkGraph::requestOptimizerVariable(
const TensorDim &dim = w->getDim();
std::vector<TensorDim> dims = cb(dim);
w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
dims, w->getName(), ":opt", TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
Tensor::Initializer::ZEROS));
}
}
}
Expand Down
19 changes: 16 additions & 3 deletions nntrainer/layers/input_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace nntrainer {
static constexpr size_t SINGLE_INOUT_IDX = 0;

InputLayer::InputLayer() :
Layer(),
input_props(props::Normalization(), props::Standardization()) {}
Layer(), input_props(props::Normalization(), props::Standardization()) {}

void InputLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, input_props);
Expand All @@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
if (!context.executeInPlace()) {
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
hidden_.copy(input_);
hidden_.copyData(input_);
}

if (std::get<props::Normalization>(input_props))
Expand All @@ -70,7 +69,21 @@ void InputLayer::finalize(InitLayerContext &context) {

std::vector<TensorDim> output_dims = context.getInputDimensions();

for (auto &d : output_dims) {
d.setDataType(context.getActivationDataType());
}

context.setOutputDimensions(output_dims);

is_inplace = true;

/**
* @note Input Layer assuems that the FP32 IN Tensor always. Therefore, if the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo assume ?

* activation data type is not fp32, then it does not support in-place
* operation.
*/
if (context.getActivationDataType() != ml::train::TensorDim::DataType::FP32)
is_inplace = false;
}

} /* namespace nntrainer */
3 changes: 2 additions & 1 deletion nntrainer/layers/input_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class InputLayer : public Layer {
/**
* @copydoc Layer::supportInPlace()
*/
bool supportInPlace() const override { return true; }
bool supportInPlace() const override { return is_inplace; }

/**
* @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
Expand All @@ -105,6 +105,7 @@ class InputLayer : public Layer {

private:
std::tuple<props::Normalization, props::Standardization> input_props;
bool is_inplace;
};
} // namespace nntrainer

Expand Down
13 changes: 13 additions & 0 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
return weights[idx]->getGradientRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &RunLayerContext::getWeightFP32(unsigned int idx) const {
if (!weights[idx]->hasGradient())
throw std::invalid_argument(
"Requesting gradient for a non-trainable weight.");
return weights[idx]->getVariableFP32Ref();
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
Expand Down
9 changes: 9 additions & 0 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,15 @@ class RunLayerContext {
Tensor &getWeightGrad(unsigned int idx) const;

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &getWeightFP32(unsigned int idx) const;

/**

* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
Expand Down
9 changes: 5 additions & 4 deletions nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
const std::vector<TensorDim> getOutputDimensions() const;
/**
* @brief Get the Weight object
* currently, only unittest uses this func.
*
* @param idx Identifier of the weight
* @return Weight& Reference to the weight
Expand All @@ -495,11 +496,11 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";
if (run_context->weightHasGradient(idx)) {
return Weight(run_context->getWeight(idx),
run_context->getWeightGrad(idx),
run_context->getWeightName(idx));
return Weight(
run_context->getWeight(idx), run_context->getWeightGrad(idx),
run_context->getWeightFP32(idx), run_context->getWeightName(idx));
} else {
return Weight(run_context->getWeight(idx), Tensor(),
return Weight(run_context->getWeight(idx), Tensor(), Tensor(),
run_context->getWeightName(idx));
}
}
Expand Down
11 changes: 10 additions & 1 deletion nntrainer/layers/loss/mse_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;

void MSELossLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

Tensor empty_tensor;
Tensor &y = context.getInput(SINGLE_INOUT_IDX).getDataType() ==
ml::train::TensorDim::DataType::FP32
? context.getInput(SINGLE_INOUT_IDX)
: empty_tensor;

if (y.empty())
y = context.getInput(SINGLE_INOUT_IDX)
.clone(ml::train::TensorDim::DataType::FP32);

// hidden_ <- y2 - y;
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Expand Down
27 changes: 22 additions & 5 deletions nntrainer/optimizers/adam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ Adam::~Adam() {}
enum AdamParams { wm, wv };

std::vector<TensorDim> Adam::getOptimizerVariableDim(const TensorDim &dim) {
return {dim, dim};
/**
* @note We assume the optimizer parameters should be full precsion to
* maintain the accuracy even in mixed precision training.
*/
TensorDim wm_dim(dim);
TensorDim wv_dim(dim);
wm_dim.setDataType(ml::train::TensorDim::DataType::FP32);
wv_dim.setDataType(ml::train::TensorDim::DataType::FP32);
return {wm_dim, wv_dim};
}

void Adam::exportTo(Exporter &exporter,
Expand Down Expand Up @@ -64,7 +72,15 @@ double Adam::getUpdatedLearningRate(unsigned int iteration, double ll) const {
}

void Adam::applyGradient(RunOptimizerContext &context) {
Tensor &x_grad = context.getGradient();
Tensor empty_tensor;

Tensor &x_grad =
context.getGradient().getDataType() == ml::train::TensorDim::DataType::FP32
? context.getGradient()
: empty_tensor;

if (x_grad.empty())
x_grad = context.getGradient().clone(ml::train::TensorDim::DataType::FP32);

auto &beta1 = std::get<PropsB1>(adam_props).get();
auto &beta2 = std::get<PropsB2>(adam_props).get();
Expand All @@ -91,7 +107,7 @@ void Adam::applyGradient(RunOptimizerContext &context) {
denom.add_i(epsilon);
wm.divide(denom, x_grad);

context.applyGradient(context.getLearningRate() / biasCorrection1);
context.applyGradient(context.getLearningRate() / biasCorrection1, x_grad);

} else {
std::function<double(double)> sqrtEps = [epsilon](double f) {
Expand All @@ -100,8 +116,9 @@ void Adam::applyGradient(RunOptimizerContext &context) {

x_grad = wv.apply<float>(sqrtEps, x_grad);
x_grad.multiply_i(wm);
context.applyGradient(getUpdatedLearningRate(context.getIteration(),
context.getLearningRate()));
context.applyGradient(
getUpdatedLearningRate(context.getIteration(), context.getLearningRate()),
x_grad);
}
}

Expand Down
7 changes: 7 additions & 0 deletions nntrainer/optimizers/optimizer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,11 @@ Tensor &RunOptimizerContext::getOptimizerVariable(unsigned int idx) const {
void RunOptimizerContext::applyGradient(double lr) const {
weight->applyGradient(lr);
}

/**
* @brief Apply the gradient with the given learning rate and gradient
*/
void RunOptimizerContext::applyGradient(double lr, Tensor &updated_grad) const {
weight->applyGradient(lr, updated_grad);
}
} // namespace nntrainer
14 changes: 11 additions & 3 deletions nntrainer/optimizers/optimizer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ class RunOptimizerContext {
*
*/
RunOptimizerContext(Weight *w = nullptr, size_t iter = 0, double lr = 0.0) :
weight(w),
iteration(iter),
learning_rate(lr) {}
weight(w), iteration(iter), learning_rate(lr) {}

/**
* @brief Get the Weight tensor object
Expand Down Expand Up @@ -75,6 +73,16 @@ class RunOptimizerContext {
*/
void applyGradient(double lr) const;

/**
* @brief Apply the gradient with the given learning rate and updated
* gradient
*
* @param lr learning rate
* @param updated_grad gradient tensor which is updated. (usually it could be
* fp32)
*/
void applyGradient(double lr, Tensor &updated_grad) const;

/**
* @brief Get the current iteration value
*
Expand Down
33 changes: 27 additions & 6 deletions nntrainer/tensor/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ std::vector<Weight *> Manager::requestWeights(
// var_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
}

Tensor *var = nullptr, *grad = nullptr;
Tensor *var = nullptr, *grad = nullptr, *var32 = nullptr;
bool is_dependent = !shared_names.empty();
if (is_dependent) {
/// shared_name is used and the orignal name is discarded
Expand All @@ -431,6 +431,17 @@ std::vector<Weight *> Manager::requestWeights(
grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix,
dim_g, grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS);

if (var->getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::vector<unsigned int> var32_exec_order;
var32_exec_order.push_back(TensorPool::PERSIST_END_ORDER);

var32 = weight_pool.requestOrExtend(shared_name + ":var32", var32_dim,
var32_exec_order, var_ls,
Tensor::Initializer::ZEROS);
}
}
} else {
/** case requesting fresh weights */
Expand All @@ -448,11 +459,21 @@ std::vector<Weight *> Manager::requestWeights(
grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim_g,
grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS, is_wgrad);
if (var->getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::vector<unsigned int> var32_exec_order;
var32_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
var32 =
weight_pool.request(name + ":var32", var32_dim, var32_exec_order,
var_ls, Tensor::Initializer::ZEROS);
}
}
}

weights_v2.emplace_back(std::make_unique<Weight>(
var, grad, w_reg, w_reg_const, decay, is_dependent, clip_by_global_norm));
var, grad, var32, w_reg, w_reg_const, decay, is_dependent,
clip_by_global_norm, axis, loss_scale));
}

std::transform(weights_v2.begin() + current_size, weights_v2.end(),
Expand Down Expand Up @@ -668,15 +689,15 @@ bool Manager::isSecondLastAccess(const std::string &name,
*/
std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
const std::vector<TensorDim> &dims, const std::string &name,
const TensorLifespan &lifespan, bool is_grad_clip,
Tensor::Initializer initializer) {
const std::string &suffix, const TensorLifespan &lifespan, bool is_grad_clip,
bool is_mixed_precision, Tensor::Initializer initializer) {

std::vector<Tensor *> ret;
ret.reserve(dims.size());

std::vector<unsigned int> exec;
exec.reserve(1);
if (is_grad_clip) {
if (is_grad_clip || is_mixed_precision) {
exec.emplace_back(TensorPool::PERSIST_END_ORDER);
} else {
exec.emplace_back(getMinMaxTensorExecutionOrder(name, true).second);
Expand All @@ -685,7 +706,7 @@ std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
/// @note this is assuming weight optimizer variables is treated as weight, if
/// not, there is room to optimize below behavior
for (unsigned int idx = 0; idx < dims.size(); idx++)
ret.push_back(weight_pool.request(name + ":opt" + std::to_string(idx),
ret.push_back(weight_pool.request(name + suffix + std::to_string(idx),
dims[idx], exec, lifespan, initializer));

return ret;
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/tensor/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ class Manager {
*/
std::vector<Tensor *> requestWeightOptimizerVariables(
const std::vector<TensorDim> &dims, const std::string &name,
const TensorLifespan &lifespan, bool is_grad_clip,
const std::string &suffix, const TensorLifespan &lifespan,
bool is_grad_clip, bool is_mixed_type,
Tensor::Initializer initializer = Tensor::Initializer::NONE);

/**
Expand Down
12 changes: 12 additions & 0 deletions nntrainer/tensor/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3065,6 +3065,18 @@ Tensor Tensor::clone() const {
return t;
}

Tensor Tensor::clone(ml::train::TensorDim::DataType type) const {
if (getDataType() == type)
return clone();

TensorDim dim = getDim();
dim.setDataType(type);
Tensor t(dim, true);
t.copyData(*this);
t.name = name;
return t;
}

void Tensor::reshape(const TensorDim &d) {

NNTR_THROW_IF(!contiguous, std::invalid_argument)
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/tensor/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,13 @@ class Tensor {
*/
Tensor clone() const;

/**
* @brief Convient wrapper for inplace copy of @a this.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @brief Convient wrapper for inplace copy of @a this.
* @brief Convenient wrapper for inplace copy of @a this.

Is it typo? Do you mean convenient ?

* @param[in] type output tensor data type
* @retval Copied version of this
*/
Tensor clone(ml::train::TensorDim::DataType type) const;

/**
* @brief Save the Tensor into file
* @param[in] file output file stream
Expand Down
Loading