alibaba · jxt1234 · Oct 9, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/source/backend/hiai/backend/NPUBackend.cpp b/source/backend/hiai/backend/NPUBackend.cpp
@@ -182,6 +182,83 @@ namespace MNN {
 
         return NO_ERROR;
     }
+#ifdef HIAI_DEBUG
+    bool WriteToBufferFile(ge::Buffer& buffer, std::string om_file_path)
+    {
+        FILE *fp;
+        fp = fopen(om_file_path.c_str(), "wb");
+        if (fp == NULL) {
+            printf("%s open failed !!!", om_file_path.c_str());
+            return false;
+        }
+
+        uint32_t write_size = (uint32_t)fwrite(buffer.data(), 1, buffer.size(), fp);
+        if (write_size != buffer.size()) {
+            fclose(fp);
+            printf("write om file failed !!!");
+            return false;
+        }
+        fclose(fp);
+        return true;
+    }
+
+    bool WriteToOMFile(domi::ModelBufferData om_model_buff, std::string om_file_path)
+    {
+        FILE *fp;
+        fp = fopen(om_file_path.c_str(), "wb");
+        if (fp == NULL) {
+            printf("%s open failed !!!", om_file_path.c_str());
+            return false;
+        }
+
+        uint32_t write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
+        if (write_size != om_model_buff.length) {
+            fclose(fp);
+            printf("write om file failed !!!");
+            return false;
+        }
+        fclose(fp);
+        return true;
+    }
+#endif
+
+    shared_ptr<hiai::AiModelMngerClient> LoadModelSync(domi::ModelBufferData modelBufferData, string model_name)
+    {
+        shared_ptr<hiai::AiModelMngerClient> mngerClient = make_shared<hiai::AiModelMngerClient>();
+        if (mngerClient == nullptr) {
+            MNN_ERROR("[NPU] Model Manager Client make_shared error.");
+            return nullptr;
+        }
+
+        int ret = mngerClient->Init(nullptr);
+        if (ret != 0) {
+            MNN_ERROR("[NPU] Model Manager Init Failed.");
+            return nullptr;
+        }
+
+        shared_ptr<hiai::AiModelBuilder> mcbuilder = make_shared<hiai::AiModelBuilder>(mngerClient);
+        hiai::MemBuffer* buffer = mcbuilder->InputMemBufferCreate(modelBufferData.data, modelBufferData.length);
+        if (buffer == nullptr) {
+            MNN_ERROR("[NPU] create MemBuffer failed");
+            return nullptr;
+        }
+
+        shared_ptr<hiai::AiModelDescription> desc = make_shared<hiai::AiModelDescription>(model_name, 3, 0, 0, 0);
+        desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
+
+        vector<shared_ptr<hiai::AiModelDescription>> model_desc;
+        model_desc.push_back(desc);
+
+
+        ret = mngerClient->Load(model_desc);
+        if (ret != 0) {
+            MNN_ERROR("[NPU] Model Load Failed.");
+            mngerClient = nullptr;
+        }
+
+        mcbuilder->MemBufferDestroy(buffer);
+        return mngerClient;
+    }
 
     static inline std::map<OpType, NPUBackend::Creator*>* getCreatorMap() {
         static std::once_flag of;
@@ -322,7 +399,7 @@ namespace MNN {
     }
 
     void NPUBackend::onExecuteEnd() const {
-        process();
+        process(0);
     }
 
     Backend::MemObj* NPUBackend::onAcquire(const Tensor* tensor, StorageType storageType) {
@@ -356,8 +433,8 @@ namespace MNN {
         if (isInputCopy) {
             auto index = mInputMap.find((unsigned long)(const_cast<Tensor*>(dstTensor)));
             MNN_ASSERT(index != mInputMap.end());
-            shared_ptr<hiai::INDTensorBuffer> input = inputTensors[index->second];
-            memcpy(input->GetData(), srcTensor->host<void>(), (size_t)input->GetSize());
+            shared_ptr<hiai::AiTensor> input = mInputTensors[index->second];
+            memcpy(input->GetBuffer(), srcTensor->host<void>(), (size_t)input->GetSize());
         } else if(isOutputCopy){
             int index;
             bool flag = false;
@@ -371,9 +448,10 @@ namespace MNN {
                 MNN_PRINT("MNNTensor and HIAITensor mismatch!");
                 return;
             }
-            shared_ptr<hiai::INDTensorBuffer> output = outputTensors[index];
+
+            shared_ptr<hiai::AiTensor> output = mOutputTensors[index];
             Tensor* tmpTensor = const_cast<Tensor*>(dstTensor);
-            memcpy(tmpTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
+            memcpy(tmpTensor->buffer().host, output->GetBuffer(), (size_t)output->GetSize());
         }
 #ifdef HIAI_DEBUG
         ATrace_endSection();
@@ -384,16 +462,59 @@ namespace MNN {
         mGrapMap.clear();
         mOutGEOpMap.clear();
         mInputOps.clear();
-        inputTensors.clear();
-        outputTensors.clear();
+        mInputTensors.clear();
+        mOutputTensors.clear();
         mMNNOutTensors.clear();
         mSclipMap.clear();
+        if (mMgrClient != nullptr) {
+            mMgrClient->UnLoadModel();
+        }
     }
 
     ErrorCode NPUBackend::onResizeEnd() {
         return bulidIRModelAndLoad();
     }
 
+    int NPUBackend::getInOutTensorInfo(string modelName) {
+        if (mMgrClient == nullptr) {
+            return -1;
+        }
+        int ret = mMgrClient->GetModelIOTensorDim(modelName, mInputDimension, mOutputDimension);
+        if (ret != hiai::AI_SUCCESS) {
+            MNN_ERROR("[NPU] Get model IO Tensor failed: %d \n", ret);
+            return -1;
+        }
+
+        MNN_PRINT("mInputDimension : %lu , mOutputDimension : %lu \n", mInputDimension.size(), mOutputDimension.size());
+
+        for (auto in_dim : mInputDimension)
+        {
+            shared_ptr<hiai::AiTensor> input = make_shared<hiai::AiTensor>();
+            input->Init(&in_dim);
+            mInputTensors.push_back(input);
+        }
+        auto index = 0;
+        for (auto out_dim : mOutputDimension)
+        {
+            shared_ptr<hiai::AiTensor> output = make_shared<hiai::AiTensor>();
+            MNN_PRINT("%d HiAiTensor output DIM:%u,%u,%u,%u\n", index,
+                      out_dim.GetNumber(), out_dim.GetChannel(),
+                      out_dim.GetHeight(), out_dim.GetWidth());
+            output->Init(&out_dim);
+            mOutputTensors.push_back(output);
+            index++;
+        }
+        index = 0;
+        for (auto opMap : mOutGEOpMap) {
+            for (auto tensor : opMap.second) {
+                mMNNOutTensors.push_back(tensor);
+                MNN_PRINT("%d MNNTensor output DIM:%d,%d,%d,%d\n", index,
+                          tensor->batch(), tensor->channel(), tensor->height(), tensor->width());
+                index++;
+            }
+        }
+        return 0;
+    }
     ErrorCode NPUBackend::bulidIRModelAndLoad() {
         std::vector<ge::Operator> inputs;
         for (auto input : mInputOps){
@@ -412,78 +533,66 @@ namespace MNN {
         ge::Graph graph(graphName);
         graph.SetInputs(inputs).SetOutputs(outputOps);
 
-        std::shared_ptr<ge::Model> model = std::make_shared<ge::Model>("model", graphName);
-        if (model == nullptr) {
-            MNN_ERROR("Create model fail.");
-            return INVALID_VALUE;
-        }
+        ge::Model model(modelName, version);
+        model.SetGraph(graph);
 
-        model->SetGraph(graph);
 
-        hiai::ModelBuildOptions buildOptions;
-        buildOptions.formatMode = hiai::FormatMode::USE_ORIGIN;
-        std::ifstream file("quant_param", std::ios::binary | std::ios::ate);
-        if (!file.is_open()) {
-            MNN_PRINT("no quant_param config file, build non-quantized model.\n");
-        } else {
-            MNN_PRINT("find quant_param config file, build quantized model.\n");
-            std::streamsize size = file.tellg();
-            file.seekg(0, std::ios::beg);
-            std::string buffer(size, ' ');
-            if (!file.read(&buffer[0], size)) {
-                MNN_ERROR("Failed to read file.\n");
-                return INVALID_VALUE;
-            }
-            file.close();
-            buildOptions.quantizeConfig = buffer;
-        }
-        domi::HiaiIrBuild modelBuilder;
-        auto ret = modelBuilder.Build(buildOptions, modelName, model, builtModel);
-        if (ret != hiai::SUCCESS || builtModel == nullptr) {
-            MNN_ERROR("model build fail !\n");
-            return INVALID_VALUE;
+        domi::HiaiIrBuild ir_build;
+        domi::ModelBufferData om_model_buff;
+
+        ge::Buffer buffer;
+        ge::GraphErrCodeStatus geret = model.Save(buffer);
+        if (geret != 0) {
+            MNN_ERROR("[NPU] Model save failed \n");
         }
 #ifdef HIAI_DEBUG
-        ret = builtModel->SaveToFile("/data/local/tmp/test_quant.om");
-        if (ret != hiai::SUCCESS) {
-            MNN_ERROR("builtModel SaveToFile failed\n");
-            return INVALID_VALUE;
-        }
+        WriteToBufferFile(buffer, "/data/local/tmp/test.irpb");
 #endif
-        modelManager = hiai::CreateModelManager();
-        hiai::ModelInitOptions initOptions;
-        ret = modelManager->Init(initOptions, builtModel, nullptr);
-        if (ret != hiai::SUCCESS) {
-            MNN_ERROR("modelManager Init failed");
-            return INVALID_VALUE;
+        bool createBufferSuc = ir_build.CreateModelBuff(model, om_model_buff);
+
+        if (!createBufferSuc) {
+            MNN_ERROR("[NPU] Create Model Buff failed \n");
         }
-        ret = modelManager->SetPriority(hiai::ModelPriority::PRIORITY_HIGH);
-        if (ret != hiai::SUCCESS) {
-            MNN_ERROR("modelManager SetPriority failed");
+        bool buildIRSuc = ir_build.BuildIRModel(model, om_model_buff);
+        if (!buildIRSuc) {
+            MNN_ERROR("[NPU] IR model build failed  \n");
+            ir_build.ReleaseModelBuff(om_model_buff);
             return INVALID_VALUE;
         }
-        std::vector<hiai::NDTensorDesc> inputDesc = builtModel->GetInputTensorDescs();
-        for (size_t i = 0; i < inputDesc.size(); i++) {
-            std::shared_ptr<hiai::INDTensorBuffer> inputTensorBuffer = hiai::CreateNDTensorBuffer(inputDesc[i]);
-            inputTensors.push_back(inputTensorBuffer);
-        }
-        std::vector<hiai::NDTensorDesc> outputDesc = builtModel->GetOutputTensorDescs();
-        for (size_t i = 0; i < outputDesc.size(); i++) {
-            std::shared_ptr<hiai::INDTensorBuffer> outputTensorBuffer = hiai::CreateNDTensorBuffer(outputDesc[i]);
-            outputTensors.push_back(outputTensorBuffer);
-        }
-        auto index = 0;
-        for (auto opMap : mOutGEOpMap) {
-            for (auto tensor : opMap.second) {
-                mMNNOutTensors.push_back(tensor);
-                index++;
-            }
+#ifdef HIAI_DEBUG
+        WriteToOMFile(om_model_buff, "/data/local/tmp/test.om");
+#endif
+        mMgrClient = LoadModelSync(om_model_buff, modelName);
+
+        if (mMgrClient == nullptr) {
+            MNN_ERROR("[NPU] Model Manager Client is null \n");
+            ir_build.ReleaseModelBuff(om_model_buff);
+            return INVALID_VALUE;
         }
-        return NO_ERROR;
+
+        ir_build.ReleaseModelBuff(om_model_buff);
+
+        int result = getInOutTensorInfo(modelName);
+        return (result == 0) ? NO_ERROR : INVALID_VALUE;
     }
 
-    int NPUBackend::process() const {
-        auto ret = modelManager->Run(*(const_cast<vector<shared_ptr<hiai::INDTensorBuffer>>*>(&inputTensors)), *(const_cast<vector<shared_ptr<hiai::INDTensorBuffer>>*>(&outputTensors)));
+    int NPUBackend::process(int modelIndex) const {
+#ifdef HIAI_DEBUG
+        ATrace_beginSection("HIAI process");
+#endif
+        hiai::AiContext context;
+        string key = "model_name";
+        string value = to_string(modelIndex);
+        context.AddPara(key, value);
+
+        int istamp;
+
+        int ret = mMgrClient->Process(context, *(const_cast<vector<shared_ptr<hiai::AiTensor>>*>(&mInputTensors)), 
+                                      *(const_cast<vector<shared_ptr<hiai::AiTensor>>*>(&mOutputTensors)), 1000,
+                                      istamp);
+#ifdef HIAI_DEBUG
+        ATrace_endSection();
+#endif
         return ret;
     }
 

diff --git a/source/backend/hiai/backend/NPUBackend.hpp b/source/backend/hiai/backend/NPUBackend.hpp
@@ -308,14 +308,17 @@ namespace MNN {
     public:
 
         ErrorCode bulidIRModelAndLoad();
-        int process() const ;
+        int process(int modelIndex) const ;
 
         shared_ptr<ge::Operator> getInputOps(const Op *op, int index = 0);
 
         void setOutputOps(const Op *op, vector<shared_ptr<ge::Operator>>&& HIAI_op,
                           const std::vector<Tensor *> &outputs);
         void setNetworkInput(const std::vector<Tensor *> &inputs, const Op* op);
 
+    private:
+        int getInOutTensorInfo(string modelName);
+
     public:
 
         map<int, vector<pair<shared_ptr<ge::Operator>, string>>> mGrapMap;
@@ -335,18 +338,19 @@ namespace MNN {
         static bool addCreator(OpType t, Creator* c);
 
     private:
-
+        shared_ptr<hiai::AiModelMngerClient> mMgrClient;
         vector<string> mModelName;
 
+        vector<hiai::TensorDimension> mInputDimension;
+        vector<hiai::TensorDimension> mOutputDimension;
+
+        vector<shared_ptr<hiai::AiTensor>> mInputTensors;
+        vector<shared_ptr<hiai::AiTensor>> mOutputTensors;
+
         MNNTensorList mMNNOutTensors;
         const NPURuntime* mNPURuntime;
         BackendConfig::PrecisionMode mPrecision;
 
-        shared_ptr<hiai::IBuiltModel> builtModel;
-        shared_ptr<hiai::IModelManager> modelManager;
-        vector<shared_ptr<hiai::INDTensorBuffer>> inputTensors;
-        vector<shared_ptr<hiai::INDTensorBuffer>> outputTensors;
-
 #ifdef HIAI_DEBUG
         void *(*ATrace_beginSection) (const char* sectionName);
         void *(*ATrace_endSection) (void);