Skip to content

Commit

Permalink
Merge pull request #1454 from alibaba/feature/sync
Browse files Browse the repository at this point in the history
Feature/sync
  • Loading branch information
jxt1234 authored Apr 16, 2021
2 parents 0837255 + 3c4ba7c commit 3dada34
Show file tree
Hide file tree
Showing 74 changed files with 1,657 additions and 540 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Math
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NeuralNetWorkOp.hpp")
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Optimizer.hpp")
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Executor.hpp")
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NN.hpp")
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Module.hpp")
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NeuralNetWorkOp.hpp")

Expand Down
66 changes: 66 additions & 0 deletions express/NeuralNetWorkOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,72 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
return (Variable::create(Expr::create(convOp.get(), {x})));
}

VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, std::vector<float>&& weightScale,
VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
float scaleIn, float scaleOut,
int8_t inputZeroPoint, int8_t outputZeroPoint,
int8_t minValue, int8_t maxValue, float weightClampValue, bool accumulateToInt16) {
std::unique_ptr<OpT> convOp(new OpT);
convOp->type = OpType_ConvInt8;
if (channel[0] == channel[1] && channel[0] == group) {
convOp->type = OpType_DepthwiseConvInt8;
}
convOp->main.type = OpParameter_Convolution2D;
convOp->main.value = new Convolution2DT;
auto conv2D = convOp->main.AsConvolution2D();
conv2D->common.reset(new Convolution2DCommonT);
conv2D->common->padMode = _convertPadMode(pad);
conv2D->common->padX = pads[0];
conv2D->common->padY = pads[1];
conv2D->common->strideX = stride[0];
conv2D->common->strideY = stride[1];
conv2D->common->group = group;
conv2D->common->outputCount = channel[1];
conv2D->common->inputCount = channel[0];
conv2D->common->dilateX = dilate[0];
conv2D->common->dilateY = dilate[1];
conv2D->common->kernelX = kernelSize[0];
conv2D->common->kernelY = kernelSize[1];
conv2D->common->relu = relu;
MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
conv2D->symmetricQuan.reset(new QuantizedFloatParamT);
if (bias.size() == 0) {
bias.resize(channel[1]);
std::fill(bias.begin(), bias.end(), 0);
}

conv2D->bias = bias;

conv2D->symmetricQuan->weight = std::move(weight);
conv2D->symmetricQuan->zeroPoint = std::move(inputZeroPoint);
conv2D->symmetricQuan->outputZeroPoint = std::move(outputZeroPoint);
MNN_ASSERT(maxValue > minValue);
conv2D->symmetricQuan->clampMin = minValue;
conv2D->symmetricQuan->clampMax = maxValue;

// const int kn = conv2D->common->outputCount;
// const int ks = weight.size() / kn;
// std::vector<float> scales(kn, 1.0f);
// std::vector<float> weightFloat;
// for (int i = 0; i < weight.size(); i++) {
// weightFloat.emplace_back(weight[i] * weightScale[i / ks]);
// }
// conv2D->quanParameter = IDSTEncoder::encode(weightFloat, weightScale, ks, kn, false, weight.data(), -int(weightClampValue));

conv2D->quanParameter.reset(new IDSTQuanT);
conv2D->quanParameter->alpha = std::move(weightScale);
conv2D->quanParameter->scaleIn = scaleIn;
conv2D->quanParameter->scaleOut = scaleOut;
conv2D->quanParameter->aMin = -int(weightClampValue);

if (accumulateToInt16) {
conv2D->symmetricQuan->method = MNN::QuantizeAlgo::QuantizeAlgo_OVERFLOW_AWARE;
}

return (Variable::create(Expr::create(convOp.get(), {x})));
}

VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;
Expand Down
2 changes: 1 addition & 1 deletion express/module/IfModule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class IfModule : public Module {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
MNN_PUBLIC static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);

private:
IfModule(){}
Expand Down
8 changes: 4 additions & 4 deletions express/module/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ Module* Module::load(const std::vector<std::string>& inputs, const std::vector<s
return PipelineModule::load(inputs, outputs, buffer, length, config);
}

Module* Module::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
return PipelineModule::extract(inputs, outputs, fortrain, subGraph);
}

EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
auto it = mExprMap.find(expr.get());
if (it == mExprMap.end()) {
Expand Down Expand Up @@ -192,5 +188,9 @@ Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
return module;
}

Module* Module::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
return new PipelineModule(inputs, outputs);
}

} // namespace Express
} // namespace MNN
175 changes: 0 additions & 175 deletions express/module/PipelineModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ using namespace MNN::Express;
namespace MNN {
namespace Express {
//#define DYNAMIC
#define PIPELINE_MODULE "_pipeline_module__"
class ExprModule : public Module {
public:
ExprModule(EXPRP expr) {
Expand Down Expand Up @@ -90,43 +89,6 @@ class ExprModule : public Module {
std::vector<int> mInputIndexes;
};

Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
if (fortrain) {
transformFunction =
[&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
auto convExtracted = NN::Utils::ExtractConvolution(source);
if (convExtracted.weight == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> module(NN::Conv(convExtracted));
module->setName(source->name());
return std::make_pair(std::vector<int>{0}, module);
};
} else {
transformFunction = [&subGraph](EXPRP source) {
if (source->get() == nullptr) {
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
}
std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
if (nullptr != m) {
m->setName(source->name());
return std::make_pair(std::vector<int>{}, m);
}
return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
};
}
return new PipelineModule(inputs, outputs, transformFunction);
}

PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
setType(PIPELINE_MODULE);
std::vector<EXPRP> executeOrder;
Expand Down Expand Up @@ -223,14 +185,6 @@ PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outpu
mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
}
}
bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
if (nullptr == module || module->type() != PIPELINE_MODULE) {
MNN_ERROR("Invalide module for quantized\n");
return false;
}
((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
return true;
}

std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
MNN_ASSERT(outputIndices.size() > 0);
Expand All @@ -251,125 +205,9 @@ std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputInd
}
}
}

return countResult;
}

void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
NN::ScaleUpdateMethod scaleUpdateMethod) {
std::vector<int> needEraseIndices;

for (int i = 0; i < mSubModules.size(); i++) {
auto& m = mSubModules[i];
auto& theModule = std::get<0>(m);
auto moduleType = theModule->type();
//auto& inputIndices = std::get<1>(m);
auto& outputIndices = std::get<2>(m);

if (moduleType == "Conv" && i < mSubModules.size() - 1) {
auto& p1 = mSubModules[i+1];
auto p1Module = std::get<0>(p1);
auto& p1ModuleType = p1Module->type();
auto& p1InputIndices = std::get<1>(p1);
auto& p1OutputIndices = std::get<2>(p1);

auto convOutputCount = countOutputReference(outputIndices);
bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));

// only conv
if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
(p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}
// conv + bn + ?
if (p1ModuleType == "BatchNorm") {
bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convBnConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}

// last conv + bn
if (i == mSubModules.size() - 2) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
// maybe there is a relu or relu6 after conv + bn
auto& p2 = mSubModules[i+2];
auto& p2Module = std::get<0>(p2);
auto p2ModuleType = p2Module->type();
auto& p2InputIndices = std::get<1>(p2);
auto& p2OutputIndices = std::get<2>(p2);

auto bnOutputCount = countOutputReference(p1OutputIndices);
bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));

// only conv + bn
if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
} else { // conv + bn + relu or conv + bn + relu6
bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
if (!convBnReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}

theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p2OutputIndices;
needEraseIndices.emplace_back(i + 1);
needEraseIndices.emplace_back(i + 2);
continue;
}
}
// conv + relu or conv + relu6
if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
if (!convReluConnected) {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
continue;
}

theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
outputIndices = p1OutputIndices;
needEraseIndices.emplace_back(i + 1);
continue;
}
}

if (i == mSubModules.size() - 1 && moduleType == "Conv") {
theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
registerModel({theModule});
}
}

// erase useless submodules
const int eraseSize = needEraseIndices.size();
int alreadyErasedCount = 0;
for (int i = 0; i < eraseSize; i++) {
auto position = needEraseIndices[i] - alreadyErasedCount;
auto type = std::get<0>(mSubModules[position])->type();
MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
mSubModules.erase(mSubModules.begin() + position);
alreadyErasedCount++;
}
}

std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
std::vector<VARP> mStack(mStackSize);
for (int i = 0; i < mInputIndexes.size(); ++i) {
Expand Down Expand Up @@ -676,19 +514,6 @@ Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::
return new StaticModule(buffer, length, inputs, outputs, *config, false);
}
}
if (config->dynamic) {
// For dynamic mode
auto varMaps = Variable::loadMap(buffer, length);
std::vector<VARP> inputVars(inputs.size());
for (int i=0; i<inputs.size(); ++i) {
inputVars[i] = varMaps[inputs[i]];
}
std::vector<VARP> outputVars(outputs.size());
for (int i=0; i<outputs.size(); ++i) {
outputVars[i] = varMaps[outputs[i]];
}
return extract(inputVars, outputVars, false, subGraphMap);
}
std::set<int> inputIndexes;
std::set<int> outputIndexes;
std::map<std::string, int> inputsMap;
Expand Down
14 changes: 5 additions & 9 deletions express/module/PipelineModule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#ifndef PipelineModule_hpp
#define PipelineModule_hpp
#include <MNN/expr/Module.hpp>
#include <MNN/expr/NN.hpp>
#include <MNN/expr/ExprCreator.hpp>

namespace MNN {
Expand All @@ -18,33 +17,30 @@ struct Net;

namespace MNN {
namespace Express {

#define PIPELINE_MODULE "_pipeline_module__"
class PipelineModule : public Module {
public:
typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
MNN_PUBLIC static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, const Module::Config* config = nullptr);
MNN_PUBLIC static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
MNN_PUBLIC static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
MNN_PUBLIC void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
virtual void onClearCache() override;
std::vector<int> countOutputReference(std::vector<int> outputIndices);
MNN_PUBLIC std::vector<int> countOutputReference(std::vector<int> outputIndices);

MNN_PUBLIC PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
const Transformer& transformFunction = {});
private:
static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, const Module::Config* config, std::map<std::string, SubGraph>& subGraphMap, bool inRecurce = false);
static void _createSubGraph(const MNN::Net* net, const Module::Config* config, std::map<std::string, SubGraph>& subGraphMap);

PipelineModule(){}
PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
const Transformer& transformFunction = {});

Module* clone(CloneContext* ctx) const override;

std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
std::vector<int> mInputIndexes;
std::vector<int> mOutputIndexes;
int mStackSize = 0;
friend class NN;
};
} // namespace Express
} // namespace MNN
Expand Down
2 changes: 1 addition & 1 deletion express/module/WhileModule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class WhileModule : public Module {
// Do nothing
}
virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
MNN_PUBLIC static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);

struct Info {
int mCondInputNumber;
Expand Down
6 changes: 6 additions & 0 deletions include/MNN/expr/NeuralNetWorkOp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
int8_t inputZeroPoint, int8_t outputZeroPoint,
int8_t minValue, int8_t maxValue, bool accumulateToInt16);
MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, std::vector<float>&& weightScale,
VARP x, INTS channel, INTS kernelSize,
PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
float scaleIn, float scaleOut,
int8_t inputZeroPoint, int8_t outputZeroPoint,
int8_t minValue, int8_t maxValue, float weightClampValue, bool accumulateToInt16);
MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);

enum GridSamplePaddingMode {GRID_SAMPLE_PADDING_ZEROS, GRID_SAMPLE_PADDING_BORDER, GRID_SAMPLE_PADDING_REFLECTION};
Expand Down
Loading

0 comments on commit 3dada34

Please sign in to comment.