Merge pull request #1454 from alibaba/feature/sync

Feature/sync
alibaba · Apr 16, 2021 · 3dada34 · 3dada34
2 parents 0837255 + 3c4ba7c
commit 3dada34
Show file tree

Hide file tree

Showing 74 changed files with 1,657 additions and 540 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -368,7 +368,6 @@ list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Math
 list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NeuralNetWorkOp.hpp")
 list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Optimizer.hpp")
 list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Executor.hpp")
-list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NN.hpp")
 list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Module.hpp")
 list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/NeuralNetWorkOp.hpp")
 

diff --git a/express/NeuralNetWorkOp.cpp b/express/NeuralNetWorkOp.cpp
@@ -1559,6 +1559,72 @@ VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std::vector<fl
     return (Variable::create(Expr::create(convOp.get(), {x})));
 }
 
+VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, std::vector<float>&& weightScale,
+            VARP x, INTS channel, INTS kernelSize,
+            PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
+            float scaleIn, float scaleOut,
+            int8_t inputZeroPoint, int8_t outputZeroPoint,
+            int8_t minValue, int8_t maxValue, float weightClampValue, bool accumulateToInt16) {
+    std::unique_ptr<OpT> convOp(new OpT);
+    convOp->type = OpType_ConvInt8;
+    if (channel[0] == channel[1] && channel[0] == group) {
+        convOp->type = OpType_DepthwiseConvInt8;
+    }
+    convOp->main.type  = OpParameter_Convolution2D;
+    convOp->main.value = new Convolution2DT;
+    auto conv2D        = convOp->main.AsConvolution2D();
+    conv2D->common.reset(new Convolution2DCommonT);
+    conv2D->common->padMode     = _convertPadMode(pad);
+    conv2D->common->padX        = pads[0];
+    conv2D->common->padY        = pads[1];
+    conv2D->common->strideX     = stride[0];
+    conv2D->common->strideY     = stride[1];
+    conv2D->common->group       = group;
+    conv2D->common->outputCount = channel[1];
+    conv2D->common->inputCount  = channel[0];
+    conv2D->common->dilateX     = dilate[0];
+    conv2D->common->dilateY     = dilate[1];
+    conv2D->common->kernelX     = kernelSize[0];
+    conv2D->common->kernelY     = kernelSize[1];
+    conv2D->common->relu = relu;
+    MNN_ASSERT(weight.size() == channel[1] * (channel[0] / group) * kernelSize[0] * kernelSize[1]);
+    conv2D->symmetricQuan.reset(new QuantizedFloatParamT);
+    if (bias.size() == 0) {
+        bias.resize(channel[1]);
+        std::fill(bias.begin(), bias.end(), 0);
+    }
+
+    conv2D->bias = bias;
+
+    conv2D->symmetricQuan->weight = std::move(weight);
+    conv2D->symmetricQuan->zeroPoint = std::move(inputZeroPoint);
+    conv2D->symmetricQuan->outputZeroPoint = std::move(outputZeroPoint);
+    MNN_ASSERT(maxValue > minValue);
+    conv2D->symmetricQuan->clampMin = minValue;
+    conv2D->symmetricQuan->clampMax = maxValue;
+
+    // const int kn = conv2D->common->outputCount;
+    // const int ks = weight.size() / kn;
+    // std::vector<float> scales(kn, 1.0f);
+    // std::vector<float> weightFloat;
+    // for (int i = 0; i < weight.size(); i++) {
+    //     weightFloat.emplace_back(weight[i] * weightScale[i / ks]);
+    // }
+    // conv2D->quanParameter = IDSTEncoder::encode(weightFloat, weightScale, ks, kn, false, weight.data(), -int(weightClampValue));
+
+    conv2D->quanParameter.reset(new IDSTQuanT);
+    conv2D->quanParameter->alpha = std::move(weightScale);
+    conv2D->quanParameter->scaleIn = scaleIn;
+    conv2D->quanParameter->scaleOut = scaleOut;
+    conv2D->quanParameter->aMin = -int(weightClampValue);
+
+    if (accumulateToInt16) {
+        conv2D->symmetricQuan->method = MNN::QuantizeAlgo::QuantizeAlgo_OVERFLOW_AWARE;
+    }
+
+    return (Variable::create(Expr::create(convOp.get(), {x})));
+}
+
 VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim) {
     std::unique_ptr<MNN::OpT> cosineSimilarityOp(new MNN::OpT);
     cosineSimilarityOp->type = MNN::OpType_CosineSimilarity;

diff --git a/express/module/IfModule.hpp b/express/module/IfModule.hpp
@@ -18,7 +18,7 @@ class IfModule : public Module {
         // Do nothing
     }
     virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
-    static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
+    MNN_PUBLIC static IfModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
 
 private:
     IfModule(){}

diff --git a/express/module/Module.cpp b/express/module/Module.cpp
@@ -141,10 +141,6 @@ Module* Module::load(const std::vector<std::string>& inputs, const std::vector<s
     return PipelineModule::load(inputs, outputs, buffer, length, config);
 }
 
-Module* Module::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
-    return PipelineModule::extract(inputs, outputs, fortrain, subGraph);
-}
-
 EXPRP Module::CloneContext::getOrClone(EXPRP expr) {
     auto it = mExprMap.find(expr.get());
     if (it == mExprMap.end()) {
@@ -192,5 +188,9 @@ Module* Module::cloneBaseTo(CloneContext* ctx, Module* module) const {
     return module;
 }
 
+Module* Module::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
+    return new PipelineModule(inputs, outputs);
+}
+
 } // namespace Express
 } // namespace MNN
diff --git a/express/module/PipelineModule.cpp b/express/module/PipelineModule.cpp
@@ -20,7 +20,6 @@ using namespace MNN::Express;
 namespace MNN {
 namespace Express {
 //#define DYNAMIC
-#define PIPELINE_MODULE "_pipeline_module__"
 class ExprModule : public Module {
 public:
     ExprModule(EXPRP expr) {
@@ -90,43 +89,6 @@ class ExprModule : public Module {
     std::vector<int> mInputIndexes;
 };
 
-Module* PipelineModule::extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph) {
-    std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(EXPRP)> transformFunction;
-    if (fortrain) {
-        transformFunction =
-        [&subGraph](EXPRP source) {
-            if (source->get() == nullptr) {
-                return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
-            }
-            std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
-            if (nullptr != m) {
-                m->setName(source->name());
-                return std::make_pair(std::vector<int>{}, m);
-            }
-            auto convExtracted = NN::Utils::ExtractConvolution(source);
-            if (convExtracted.weight == nullptr) {
-                return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
-            }
-            std::shared_ptr<Module> module(NN::Conv(convExtracted));
-            module->setName(source->name());
-            return std::make_pair(std::vector<int>{0}, module);
-        };
-    } else {
-        transformFunction = [&subGraph](EXPRP source) {
-            if (source->get() == nullptr) {
-                return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
-            }
-            std::shared_ptr<Module> m(NN::Utils::ExtractNotRunableOp(source, subGraph));
-            if (nullptr != m) {
-                m->setName(source->name());
-                return std::make_pair(std::vector<int>{}, m);
-            }
-            return std::make_pair(std::vector<int>{}, std::shared_ptr<Module>(nullptr));
-        };
-    }
-    return new PipelineModule(inputs, outputs, transformFunction);
-}
-
 PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outputs, const Transformer& transformFunction) {
     setType(PIPELINE_MODULE);
     std::vector<EXPRP> executeOrder;
@@ -223,14 +185,6 @@ PipelineModule::PipelineModule(std::vector<VARP> inputs, std::vector<VARP> outpu
         mOutputIndexes.emplace_back(indexes[outputExpr.first] + outputExpr.second);
     }
 }
-bool PipelineModule::turnQuantize(Module* module, const int bit, NN::FeatureScaleStatMethod featureScaleStatMethod, NN::ScaleUpdateMethod scaleUpdateMethod) {
-    if (nullptr == module || module->type() != PIPELINE_MODULE) {
-        MNN_ERROR("Invalide module for quantized\n");
-        return false;
-    }
-    ((PipelineModule*)module)->toTrainQuant(bit, featureScaleStatMethod, scaleUpdateMethod);
-    return true;
-}
 
 std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputIndices) {
     MNN_ASSERT(outputIndices.size() > 0);
@@ -251,125 +205,9 @@ std::vector<int> PipelineModule::countOutputReference(std::vector<int> outputInd
             }
         }
     }
-
     return countResult;
 }
 
-void PipelineModule::toTrainQuant(const int bits, NN::FeatureScaleStatMethod featureScaleStatMethod,
-                                        NN::ScaleUpdateMethod scaleUpdateMethod) {
-    std::vector<int> needEraseIndices;
-
-    for (int i = 0; i < mSubModules.size(); i++) {
-        auto& m = mSubModules[i];
-        auto& theModule = std::get<0>(m);
-        auto moduleType = theModule->type();
-        //auto& inputIndices = std::get<1>(m);
-        auto& outputIndices = std::get<2>(m);
-
-        if (moduleType == "Conv" && i < mSubModules.size() - 1) {
-            auto& p1 = mSubModules[i+1];
-            auto p1Module = std::get<0>(p1);
-            auto& p1ModuleType = p1Module->type();
-            auto& p1InputIndices = std::get<1>(p1);
-            auto& p1OutputIndices = std::get<2>(p1);
-
-            auto convOutputCount = countOutputReference(outputIndices);
-            bool convSingleOutputReference = ((outputIndices.size() == 1) && (convOutputCount[0] == 1));
-
-            // only conv
-            if ((!convSingleOutputReference) || (p1ModuleType == "Conv") ||
-                    (p1ModuleType != "BatchNorm" && p1ModuleType != "ReLU" && p1ModuleType != "ReLU6")) {
-                theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                registerModel({theModule});
-                continue;
-            }
-            // conv + bn + ?
-            if (p1ModuleType == "BatchNorm") {
-                bool convBnConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
-                if (!convBnConnected) {
-                    theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                    registerModel({theModule});
-                    continue;
-                }
-
-                // last conv + bn
-                if (i == mSubModules.size() - 2) {
-                    theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                    registerModel({theModule});
-                    outputIndices = p1OutputIndices;
-                    needEraseIndices.emplace_back(i + 1);
-                    continue;
-                }
-                // maybe there is a relu or relu6 after conv + bn
-                auto& p2 = mSubModules[i+2];
-                auto& p2Module = std::get<0>(p2);
-                auto p2ModuleType = p2Module->type();
-                auto& p2InputIndices = std::get<1>(p2);
-                auto& p2OutputIndices = std::get<2>(p2);
-
-                auto bnOutputCount = countOutputReference(p1OutputIndices);
-                bool bnSingleOutputReference = ((p1OutputIndices.size() == 1) && (bnOutputCount[0] == 1));
-
-                // only conv + bn
-                if ((!bnSingleOutputReference) || (p2ModuleType != "ReLU" && p2ModuleType != "ReLU6")) {
-                    theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                    registerModel({theModule});
-                    outputIndices = p1OutputIndices;
-                    needEraseIndices.emplace_back(i + 1);
-                    continue;
-                } else { // conv + bn + relu or conv + bn + relu6
-                    bool convBnReluConnected = ((bnSingleOutputReference) && (p2InputIndices.size() == 1) && (p2InputIndices[0] == p1OutputIndices[0]));
-                    if (!convBnReluConnected) {
-                        theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                        registerModel({theModule});
-                        outputIndices = p1OutputIndices;
-                        needEraseIndices.emplace_back(i + 1);
-                        continue;
-                    }
-
-                    theModule.reset(NN::ConvBNReluFused({theModule, p1Module, p2Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                    registerModel({theModule});
-                    outputIndices = p2OutputIndices;
-                    needEraseIndices.emplace_back(i + 1);
-                    needEraseIndices.emplace_back(i + 2);
-                    continue;
-                }
-            }
-            // conv + relu or conv + relu6
-            if (p1ModuleType == "ReLU" || p1ModuleType == "ReLU6") {
-                bool convReluConnected = ((convSingleOutputReference) && (p1InputIndices.size() == 1) && (p1InputIndices[0] == outputIndices[0]));
-                if (!convReluConnected) {
-                    theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                    registerModel({theModule});
-                    continue;
-                }
-
-                theModule.reset(NN::ConvBNReluFused({theModule, p1Module}, featureScaleStatMethod, scaleUpdateMethod, bits));
-                registerModel({theModule});
-                outputIndices = p1OutputIndices;
-                needEraseIndices.emplace_back(i + 1);
-                continue;
-            }
-        }
-
-        if (i == mSubModules.size() - 1 && moduleType == "Conv") {
-            theModule.reset(NN::ConvBNReluFused({theModule}, featureScaleStatMethod, scaleUpdateMethod, bits));
-            registerModel({theModule});
-        }
-    }
-
-    // erase useless submodules
-    const int eraseSize = needEraseIndices.size();
-    int alreadyErasedCount = 0;
-    for (int i = 0; i < eraseSize; i++) {
-        auto position = needEraseIndices[i] - alreadyErasedCount;
-        auto type = std::get<0>(mSubModules[position])->type();
-        MNN_ASSERT(type == "BatchNorm" || type == "ReLU" || type == "ReLU6");
-        mSubModules.erase(mSubModules.begin() + position);
-        alreadyErasedCount++;
-    }
-}
-
 std::vector<VARP> PipelineModule::onForward(const std::vector<VARP>& inputs) {
     std::vector<VARP> mStack(mStackSize);
     for (int i = 0; i < mInputIndexes.size(); ++i) {
@@ -676,19 +514,6 @@ Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::
             return new StaticModule(buffer, length, inputs, outputs, *config, false);
         }
     }
-    if (config->dynamic) {
-        // For dynamic mode
-        auto varMaps = Variable::loadMap(buffer, length);
-        std::vector<VARP> inputVars(inputs.size());
-        for (int i=0; i<inputs.size(); ++i) {
-            inputVars[i] = varMaps[inputs[i]];
-        }
-        std::vector<VARP> outputVars(outputs.size());
-        for (int i=0; i<outputs.size(); ++i) {
-            outputVars[i] = varMaps[outputs[i]];
-        }
-        return extract(inputVars, outputVars, false, subGraphMap);
-    }
     std::set<int> inputIndexes;
     std::set<int> outputIndexes;
     std::map<std::string, int> inputsMap;

diff --git a/express/module/PipelineModule.hpp b/express/module/PipelineModule.hpp
@@ -9,7 +9,6 @@
 #ifndef PipelineModule_hpp
 #define PipelineModule_hpp
 #include <MNN/expr/Module.hpp>
-#include <MNN/expr/NN.hpp>
 #include <MNN/expr/ExprCreator.hpp>
 
 namespace MNN {
@@ -18,33 +17,30 @@ struct Net;
 
 namespace MNN {
 namespace Express {
-
+#define PIPELINE_MODULE "_pipeline_module__"
 class PipelineModule : public Module {
 public:
     typedef std::function<std::pair<std::vector<int>, std::shared_ptr<Module>>(Express::EXPRP)> Transformer;
     MNN_PUBLIC static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, const Module::Config* config = nullptr);
-    MNN_PUBLIC static Module* extract(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs, bool fortrain, const std::map<std::string, SubGraph>& subGraph = {});
-    MNN_PUBLIC static bool turnQuantize(Module* module, const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor, NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
-    MNN_PUBLIC void toTrainQuant(const int bits = 8, NN::FeatureScaleStatMethod featureScaleStatMethod = NN::PerTensor,
-                      NN::ScaleUpdateMethod scaleUpdateMethod = NN::MovingAverage);
     virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
     virtual void onClearCache() override;
-    std::vector<int> countOutputReference(std::vector<int> outputIndices);
+    MNN_PUBLIC std::vector<int> countOutputReference(std::vector<int> outputIndices);
 
+    MNN_PUBLIC PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
+                   const Transformer& transformFunction = {});
 private:
     static Module* load(const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, const uint8_t* buffer, size_t length, const Module::Config* config, std::map<std::string, SubGraph>& subGraphMap, bool inRecurce = false);
     static void _createSubGraph(const MNN::Net* net, const Module::Config* config, std::map<std::string, SubGraph>& subGraphMap);
 
     PipelineModule(){}
-    PipelineModule(std::vector<Express::VARP> inputs, std::vector<Express::VARP> outputs,
-                   const Transformer& transformFunction = {});
 
     Module* clone(CloneContext* ctx) const override;
 
     std::vector<std::tuple<std::shared_ptr<Module>, std::vector<int>, std::vector<int>>> mSubModules;
     std::vector<int> mInputIndexes;
     std::vector<int> mOutputIndexes;
     int mStackSize = 0;
+    friend class NN;
 };
 } // namespace Express
 } // namespace MNN

diff --git a/express/module/WhileModule.hpp b/express/module/WhileModule.hpp
@@ -16,7 +16,7 @@ class WhileModule : public Module {
         // Do nothing
     }
     virtual std::vector<Express::VARP> onForward(const std::vector<Express::VARP>& inputs) override;
-    static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
+    MNN_PUBLIC static WhileModule* create(const Op* op, const std::map<std::string, SubGraph>& subGraph);
 
     struct Info {
         int mCondInputNumber;

diff --git a/include/MNN/expr/NeuralNetWorkOp.hpp b/include/MNN/expr/NeuralNetWorkOp.hpp
@@ -135,6 +135,12 @@ MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<int>&& bias, std
                       PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
                       int8_t inputZeroPoint, int8_t outputZeroPoint,
                       int8_t minValue, int8_t maxValue, bool accumulateToInt16);
+MNN_PUBLIC VARP _Conv(std::vector<int8_t>&& weight, std::vector<float>&& bias, std::vector<float>&& weightScale,
+                      VARP x, INTS channel, INTS kernelSize,
+                      PaddingMode pad, INTS stride, INTS dilate, int group, INTS pads, bool relu,
+                      float scaleIn, float scaleOut,
+                      int8_t inputZeroPoint, int8_t outputZeroPoint,
+                      int8_t minValue, int8_t maxValue, float weightClampValue, bool accumulateToInt16);
 MNN_PUBLIC VARP _CosineSimilarity(VARP input0, VARP input1, VARP inputDim);
 
 enum GridSamplePaddingMode {GRID_SAMPLE_PADDING_ZEROS, GRID_SAMPLE_PADDING_BORDER, GRID_SAMPLE_PADDING_REFLECTION};