From a19c591c5e39588b37196a40938b4d2c4404172b Mon Sep 17 00:00:00 2001 From: ZHEQIUSHUI Date: Wed, 15 May 2024 17:01:48 +0800 Subject: [PATCH] optimize --- build.sh | 39 +++++----- src/runner/LLM.hpp | 18 ++--- .../ax_model_runner/ax_model_runner.hpp | 8 +- .../ax_model_runner/ax_model_runner_ax650.cpp | 77 ++++++------------- .../ax_model_runner/ax_model_runner_ax650.hpp | 5 +- src/runner/utils/bfloat16.hpp | 1 + src/runner/utils/memory_utils.cpp | 44 +++++++++-- src/runner/utils/memory_utils.hpp | 4 +- 8 files changed, 100 insertions(+), 96 deletions(-) diff --git a/build.sh b/build.sh index 1f4adbc..7267655 100755 --- a/build.sh +++ b/build.sh @@ -2,7 +2,7 @@ # BSP_MSP_DIR 这个变量使用*绝对路径*指定到 SDK 的msp/out目录,如下所示(根据自己的目录修改) # 绝对路径 绝对路径 绝对路径 -BSP_MSP_DIR=$PWD/AX650_SDK_V1.63.2_20240125165719_NO3387/msp/out/ +BSP_MSP_DIR=$PWD/bsp_msp_out/msp/out/ echo "bsp dir: ${BSP_MSP_DIR}" # 下面会简单判断 BSP 路径是否正确 if [ ! -d "${BSP_MSP_DIR}" ]; then @@ -25,25 +25,28 @@ cd ${build_dir} URL="https://developer.arm.com/-/media/Files/downloads/gnu-a/9.2-2019.12/binrel/gcc-arm-9.2-2019.12-x86_64-aarch64-none-linux-gnu.tar.xz" FOLDER="gcc-arm-9.2-2019.12-x86_64-aarch64-none-linux-gnu" -# Check if the file exists -if [ ! -f "$FOLDER.tar.xz" ]; then - # Download the file - echo "Downloading $URL" - wget "$URL" -O "$FOLDER.tar.xz" -fi - -# Check if the folder exists -if [ ! -d "$FOLDER" ]; then - # Extract the file - echo "Extracting $FOLDER.tar.xz" - tar -xf "$FOLDER.tar.xz" -fi - -export PATH=$PATH:$PWD/$FOLDER/bin/ aarch64-none-linux-gnu-gcc -v if [ $? -ne 0 ]; then - echo "Error: aarch64-none-linux-gnu-gcc not found" - exit 1 + # Check if the file exists + if [ ! -f "$FOLDER.tar.xz" ]; then + # Download the file + echo "Downloading $URL" + wget "$URL" -O "$FOLDER.tar.xz" + fi + + # Check if the folder exists + if [ ! -d "$FOLDER" ]; then + # Extract the file + echo "Extracting $FOLDER.tar.xz" + tar -xf "$FOLDER.tar.xz" + fi + + export PATH=$PATH:$PWD/$FOLDER/bin/ + aarch64-none-linux-gnu-gcc -v + if [ $? -ne 0 ]; then + echo "Error: aarch64-none-linux-gnu-gcc not found" + exit 1 + fi fi # 开始编译 diff --git a/src/runner/LLM.hpp b/src/runner/LLM.hpp index ac40054..ec49dba 100644 --- a/src/runner/LLM.hpp +++ b/src/runner/LLM.hpp @@ -118,7 +118,7 @@ class LLM if (!attr.b_dynamic_load_axmodel_layer) { - int ret = llama_layers[i].layer.init(llama_layers[i].filename.c_str()); + int ret = llama_layers[i].layer.init(llama_layers[i].filename.c_str(), false); if (ret != 0) { ALOGE("init axmodel(%s) failed", llama_layers[i].filename.c_str()); @@ -148,7 +148,7 @@ class LLM } } - int ret = llama_post.init(attr.filename_post_axmodel.c_str()); + int ret = llama_post.init(attr.filename_post_axmodel.c_str(), false); if (ret != 0) { ALOGE("init post axmodel(%s) failed", attr.filename_post_axmodel.c_str()); @@ -163,11 +163,11 @@ class LLM int ret; if (_attr.b_use_mmap_load_layer) { - ret = layer.layer.init(layer.layer_buffer); + ret = layer.layer.init((char *)layer.layer_buffer.data(), layer.layer_buffer.size()); } else { - ret = layer.layer.init(layer.layer_buffer_vec); + ret = layer.layer.init(layer.layer_buffer_vec.data(), layer.layer_buffer_vec.size()); } if (ret != 0) { @@ -176,12 +176,12 @@ class LLM } { - _attr.max_token_len = llama_layers[0].layer.get_input("mask").vShape[0] / sizeof(unsigned short) - 1; + _attr.max_token_len = llama_layers[0].layer.get_input("mask").nSize / sizeof(unsigned short) - 1; ALOGI("max_token_len : %d", _attr.max_token_len); // auto &input_k_cache = llama_layers[0].layer.get_input("K_cache"); // auto &output_k_cache_out = llama_layers[0].layer.get_output("K_cache_out"); - _attr.kv_cache_size = llama_layers[0].layer.get_output("K_cache_out").vShape[0] / sizeof(unsigned short); - _attr.kv_cache_num = llama_layers[0].layer.get_input("K_cache").vShape[0] / _attr.kv_cache_size / sizeof(unsigned short); + _attr.kv_cache_size = llama_layers[0].layer.get_output("K_cache_out").nSize / sizeof(unsigned short); + _attr.kv_cache_num = llama_layers[0].layer.get_input("K_cache").nSize / _attr.kv_cache_size / sizeof(unsigned short); ALOGI("kv_cache_size : %d, kv_cache_num: %d", _attr.kv_cache_size, _attr.kv_cache_num); if (_attr.max_token_len > _attr.kv_cache_num) { @@ -275,11 +275,11 @@ class LLM int ret; if (_attr.b_use_mmap_load_layer) { - ret = layer.layer.init(layer.layer_buffer); + ret = layer.layer.init((char *)layer.layer_buffer.data(), layer.layer_buffer.size()); } else { - ret = layer.layer.init(layer.layer_buffer_vec); + ret = layer.layer.init(layer.layer_buffer_vec.data(), layer.layer_buffer_vec.size()); } if (ret != 0) { diff --git a/src/runner/ax_model_runner/ax_model_runner.hpp b/src/runner/ax_model_runner/ax_model_runner.hpp index f593aff..1b93562 100644 --- a/src/runner/ax_model_runner/ax_model_runner.hpp +++ b/src/runner/ax_model_runner/ax_model_runner.hpp @@ -2,7 +2,6 @@ #include #include #include -#include "memory_utils.hpp" typedef enum _color_space_e { @@ -47,9 +46,8 @@ class ax_runner_base std::map map_input_tensors; public: - virtual int init(const char *model_file) = 0; - virtual int init(std::vector &model_buffer) = 0; - virtual int init(MMap &model_buffer) = 0; + virtual int init(const char *model_file, bool use_mmap = false) = 0; + virtual int init(char *model_buffer, size_t model_size) = 0; virtual void deinit() = 0; @@ -106,4 +104,4 @@ class ax_runner_base } }; -int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size); \ No newline at end of file +// int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size); \ No newline at end of file diff --git a/src/runner/ax_model_runner/ax_model_runner_ax650.cpp b/src/runner/ax_model_runner/ax_model_runner_ax650.cpp index eace2c3..84f233f 100644 --- a/src/runner/ax_model_runner/ax_model_runner_ax650.cpp +++ b/src/runner/ax_model_runner/ax_model_runner_ax650.cpp @@ -277,63 +277,36 @@ int ax_runner_ax650::sub_init() return ret; } -int ax_runner_ax650::init(const char *model_file) +int ax_runner_ax650::init(const char *model_file, bool use_mmap) { - // 2. load model - std::shared_ptr model_buffer(new MMap(model_file)); - if (!model_buffer->data()) + if (use_mmap) { - ALOGE("mmap"); - return -1; - } - return init(*model_buffer.get()); - // std::shared_ptr> model_buffer((new std::vector())); - // if (!read_file(model_file, *model_buffer.get())) - // { - // ALOGE("read_file"); - // return -1; - // } - - // 3. create handle -} - -int ax_runner_ax650::init(MMap &model_buffer) -{ - if (!m_handle) - { - m_handle = new ax_joint_runner_ax650_handle_t; - } - - static bool b_init = false; - if (!b_init) - { - // 1. init engine - AX_ENGINE_NPU_ATTR_T npu_attr; - memset(&npu_attr, 0, sizeof(npu_attr)); - npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE; - AX_SYS_Init(); - auto ret = AX_ENGINE_Init(&npu_attr); - if (0 != ret) + MMap model_buffer(model_file); + if (!model_buffer.data()) { - return ret; + ALOGE("mmap"); + return -1; } - b_init = true; + auto ret = init((char *)model_buffer.data(), model_buffer.size()); + model_buffer.close_file(); + return ret; } - - // 3. create handle - - int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer.data(), model_buffer.size()); - if (0 != ret) + else { - ALOGE("AX_ENGINE_CreateHandle"); + char *model_buffer; + size_t len; + if (!read_file(model_file, &model_buffer, &len)) + { + ALOGE("read_file"); + return -1; + } + auto ret = init(model_buffer, len); + delete[] model_buffer; return ret; } - // fprintf(stdout, "Engine creating handle is done.\n"); - - return sub_init(); } -int ax_runner_ax650::init(std::vector &model_buffer) +int ax_runner_ax650::init(char *model_buffer, size_t model_size) { if (!m_handle) { @@ -358,7 +331,7 @@ int ax_runner_ax650::init(std::vector &model_buffer) // 3. create handle - int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer.data(), model_buffer.size()); + int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer, model_size); if (0 != ret) { ALOGE("AX_ENGINE_CreateHandle"); @@ -461,7 +434,7 @@ int ax_runner_ax650::inference() return AX_ENGINE_RunSync(m_handle->handle, &m_handle->io_data); } -int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size) -{ - return AX_IVPS_CmmCopyTdp(dst, src, size); -} \ No newline at end of file +// int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size) +// { +// return AX_IVPS_CmmCopyTdp(dst, src, size); +// } \ No newline at end of file diff --git a/src/runner/ax_model_runner/ax_model_runner_ax650.hpp b/src/runner/ax_model_runner/ax_model_runner_ax650.hpp index dc1d8f4..03826d4 100644 --- a/src/runner/ax_model_runner/ax_model_runner_ax650.hpp +++ b/src/runner/ax_model_runner/ax_model_runner_ax650.hpp @@ -11,9 +11,8 @@ class ax_runner_ax650 : public ax_runner_base int sub_init(); public: - int init(const char *model_file) override; - int init(std::vector &model_buffer) override; - int init(MMap &model_buffer) override; + int init(const char *model_file, bool use_mmap = false) override; + int init(char *model_buffer, size_t model_size) override; void release(); void deinit() override; diff --git a/src/runner/utils/bfloat16.hpp b/src/runner/utils/bfloat16.hpp index d1a1bae..157e07d 100644 --- a/src/runner/utils/bfloat16.hpp +++ b/src/runner/utils/bfloat16.hpp @@ -48,6 +48,7 @@ static std::vector> topk_bfloat16(unsigned short *arr, int // Create a vector of pairs with index and value std::vector> indexedValues; + indexedValues.reserve(size); for (int i = 0; i < size; ++i) { indexedValues.push_back(std::make_pair(i, bfloat16(arr[i]))); diff --git a/src/runner/utils/memory_utils.cpp b/src/runner/utils/memory_utils.cpp index 6409b7b..5ee81b7 100644 --- a/src/runner/utils/memory_utils.cpp +++ b/src/runner/utils/memory_utils.cpp @@ -13,6 +13,7 @@ bool file_exist(const std::string &path) bool read_file(const std::string &path, std::vector &data) { + std::fstream fs(path, std::ios::in | std::ios::binary); if (!fs.is_open()) @@ -20,18 +21,45 @@ bool read_file(const std::string &path, std::vector &data) return false; } - fs.seekg(std::ios::end); - auto fs_end = fs.tellg(); - fs.seekg(std::ios::beg); - auto fs_beg = fs.tellg(); + // get file size + fs.seekg(0, std::ios::end); + size_t file_size = fs.tellg(); + fs.seekg(0, std::ios::beg); - auto file_size = static_cast(fs_end - fs_beg); - auto vector_size = data.size(); + if (file_size == 0) + { + return false; + } - data.reserve(vector_size + file_size); - data.insert(data.end(), std::istreambuf_iterator(fs), std::istreambuf_iterator()); + data.resize(file_size); + fs.read(data.data(), file_size); + // data.insert(data.end(), std::istreambuf_iterator(fs), std::istreambuf_iterator()); fs.close(); return true; } + +bool read_file(const std::string &path, char **data, size_t *len) +{ + FILE *fp = fopen(path.c_str(), "rb"); + + if (!fp) + { + return false; + } + + fseek(fp, 0, SEEK_END); + + *len = ftell(fp); + + fseek(fp, 0, SEEK_SET); + + *data = new char[*len]; + + fread(*data, *len, 1, fp); + + fclose(fp); + + return true; +} diff --git a/src/runner/utils/memory_utils.hpp b/src/runner/utils/memory_utils.hpp index 9bb24a8..54d062c 100644 --- a/src/runner/utils/memory_utils.hpp +++ b/src/runner/utils/memory_utils.hpp @@ -8,7 +8,7 @@ bool file_exist(const std::string &path); bool read_file(const std::string &path, std::vector &data); - +bool read_file(const std::string &path, char **data, size_t *len); class MMap { private: @@ -36,6 +36,8 @@ class MMap if (_add) { munmap(_add, _size); + _add = nullptr; + _size = 0; } }