Skip to content

Commit

Permalink
optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
ZHEQIUSHUI committed May 15, 2024
1 parent d1b4aba commit a19c591
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 96 deletions.
39 changes: 21 additions & 18 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# BSP_MSP_DIR 这个变量使用*绝对路径*指定到 SDK 的msp/out目录,如下所示(根据自己的目录修改)
# 绝对路径 绝对路径 绝对路径
BSP_MSP_DIR=$PWD/AX650_SDK_V1.63.2_20240125165719_NO3387/msp/out/
BSP_MSP_DIR=$PWD/bsp_msp_out/msp/out/
echo "bsp dir: ${BSP_MSP_DIR}"
# 下面会简单判断 BSP 路径是否正确
if [ ! -d "${BSP_MSP_DIR}" ]; then
Expand All @@ -25,25 +25,28 @@ cd ${build_dir}
URL="https://developer.arm.com/-/media/Files/downloads/gnu-a/9.2-2019.12/binrel/gcc-arm-9.2-2019.12-x86_64-aarch64-none-linux-gnu.tar.xz"
FOLDER="gcc-arm-9.2-2019.12-x86_64-aarch64-none-linux-gnu"

# Check if the file exists
if [ ! -f "$FOLDER.tar.xz" ]; then
# Download the file
echo "Downloading $URL"
wget "$URL" -O "$FOLDER.tar.xz"
fi

# Check if the folder exists
if [ ! -d "$FOLDER" ]; then
# Extract the file
echo "Extracting $FOLDER.tar.xz"
tar -xf "$FOLDER.tar.xz"
fi

export PATH=$PATH:$PWD/$FOLDER/bin/
aarch64-none-linux-gnu-gcc -v
if [ $? -ne 0 ]; then
echo "Error: aarch64-none-linux-gnu-gcc not found"
exit 1
# Check if the file exists
if [ ! -f "$FOLDER.tar.xz" ]; then
# Download the file
echo "Downloading $URL"
wget "$URL" -O "$FOLDER.tar.xz"
fi

# Check if the folder exists
if [ ! -d "$FOLDER" ]; then
# Extract the file
echo "Extracting $FOLDER.tar.xz"
tar -xf "$FOLDER.tar.xz"
fi

export PATH=$PATH:$PWD/$FOLDER/bin/
aarch64-none-linux-gnu-gcc -v
if [ $? -ne 0 ]; then
echo "Error: aarch64-none-linux-gnu-gcc not found"
exit 1
fi
fi

# 开始编译
Expand Down
18 changes: 9 additions & 9 deletions src/runner/LLM.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class LLM

if (!attr.b_dynamic_load_axmodel_layer)
{
int ret = llama_layers[i].layer.init(llama_layers[i].filename.c_str());
int ret = llama_layers[i].layer.init(llama_layers[i].filename.c_str(), false);
if (ret != 0)
{
ALOGE("init axmodel(%s) failed", llama_layers[i].filename.c_str());
Expand Down Expand Up @@ -148,7 +148,7 @@ class LLM
}
}

int ret = llama_post.init(attr.filename_post_axmodel.c_str());
int ret = llama_post.init(attr.filename_post_axmodel.c_str(), false);
if (ret != 0)
{
ALOGE("init post axmodel(%s) failed", attr.filename_post_axmodel.c_str());
Expand All @@ -163,11 +163,11 @@ class LLM
int ret;
if (_attr.b_use_mmap_load_layer)
{
ret = layer.layer.init(layer.layer_buffer);
ret = layer.layer.init((char *)layer.layer_buffer.data(), layer.layer_buffer.size());
}
else
{
ret = layer.layer.init(layer.layer_buffer_vec);
ret = layer.layer.init(layer.layer_buffer_vec.data(), layer.layer_buffer_vec.size());
}
if (ret != 0)
{
Expand All @@ -176,12 +176,12 @@ class LLM
}

{
_attr.max_token_len = llama_layers[0].layer.get_input("mask").vShape[0] / sizeof(unsigned short) - 1;
_attr.max_token_len = llama_layers[0].layer.get_input("mask").nSize / sizeof(unsigned short) - 1;
ALOGI("max_token_len : %d", _attr.max_token_len);
// auto &input_k_cache = llama_layers[0].layer.get_input("K_cache");
// auto &output_k_cache_out = llama_layers[0].layer.get_output("K_cache_out");
_attr.kv_cache_size = llama_layers[0].layer.get_output("K_cache_out").vShape[0] / sizeof(unsigned short);
_attr.kv_cache_num = llama_layers[0].layer.get_input("K_cache").vShape[0] / _attr.kv_cache_size / sizeof(unsigned short);
_attr.kv_cache_size = llama_layers[0].layer.get_output("K_cache_out").nSize / sizeof(unsigned short);
_attr.kv_cache_num = llama_layers[0].layer.get_input("K_cache").nSize / _attr.kv_cache_size / sizeof(unsigned short);
ALOGI("kv_cache_size : %d, kv_cache_num: %d", _attr.kv_cache_size, _attr.kv_cache_num);
if (_attr.max_token_len > _attr.kv_cache_num)
{
Expand Down Expand Up @@ -275,11 +275,11 @@ class LLM
int ret;
if (_attr.b_use_mmap_load_layer)
{
ret = layer.layer.init(layer.layer_buffer);
ret = layer.layer.init((char *)layer.layer_buffer.data(), layer.layer_buffer.size());
}
else
{
ret = layer.layer.init(layer.layer_buffer_vec);
ret = layer.layer.init(layer.layer_buffer_vec.data(), layer.layer_buffer_vec.size());
}
if (ret != 0)
{
Expand Down
8 changes: 3 additions & 5 deletions src/runner/ax_model_runner/ax_model_runner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include <vector>
#include <string>
#include <map>
#include "memory_utils.hpp"

typedef enum _color_space_e
{
Expand Down Expand Up @@ -47,9 +46,8 @@ class ax_runner_base
std::map<std::string, ax_runner_tensor_t> map_input_tensors;

public:
virtual int init(const char *model_file) = 0;
virtual int init(std::vector<char> &model_buffer) = 0;
virtual int init(MMap &model_buffer) = 0;
virtual int init(const char *model_file, bool use_mmap = false) = 0;
virtual int init(char *model_buffer, size_t model_size) = 0;

virtual void deinit() = 0;

Expand Down Expand Up @@ -106,4 +104,4 @@ class ax_runner_base
}
};

int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size);
// int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size);
77 changes: 25 additions & 52 deletions src/runner/ax_model_runner/ax_model_runner_ax650.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,63 +277,36 @@ int ax_runner_ax650::sub_init()
return ret;
}

int ax_runner_ax650::init(const char *model_file)
int ax_runner_ax650::init(const char *model_file, bool use_mmap)
{
// 2. load model
std::shared_ptr<MMap> model_buffer(new MMap(model_file));
if (!model_buffer->data())
if (use_mmap)
{
ALOGE("mmap");
return -1;
}
return init(*model_buffer.get());
// std::shared_ptr<std::vector<char>> model_buffer((new std::vector<char>()));
// if (!read_file(model_file, *model_buffer.get()))
// {
// ALOGE("read_file");
// return -1;
// }

// 3. create handle
}

int ax_runner_ax650::init(MMap &model_buffer)
{
if (!m_handle)
{
m_handle = new ax_joint_runner_ax650_handle_t;
}

static bool b_init = false;
if (!b_init)
{
// 1. init engine
AX_ENGINE_NPU_ATTR_T npu_attr;
memset(&npu_attr, 0, sizeof(npu_attr));
npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE;
AX_SYS_Init();
auto ret = AX_ENGINE_Init(&npu_attr);
if (0 != ret)
MMap model_buffer(model_file);
if (!model_buffer.data())
{
return ret;
ALOGE("mmap");
return -1;
}
b_init = true;
auto ret = init((char *)model_buffer.data(), model_buffer.size());
model_buffer.close_file();
return ret;
}

// 3. create handle

int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer.data(), model_buffer.size());
if (0 != ret)
else
{
ALOGE("AX_ENGINE_CreateHandle");
char *model_buffer;
size_t len;
if (!read_file(model_file, &model_buffer, &len))
{
ALOGE("read_file");
return -1;
}
auto ret = init(model_buffer, len);
delete[] model_buffer;
return ret;
}
// fprintf(stdout, "Engine creating handle is done.\n");

return sub_init();
}

int ax_runner_ax650::init(std::vector<char> &model_buffer)
int ax_runner_ax650::init(char *model_buffer, size_t model_size)
{
if (!m_handle)
{
Expand All @@ -358,7 +331,7 @@ int ax_runner_ax650::init(std::vector<char> &model_buffer)

// 3. create handle

int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer.data(), model_buffer.size());
int ret = AX_ENGINE_CreateHandle(&m_handle->handle, model_buffer, model_size);
if (0 != ret)
{
ALOGE("AX_ENGINE_CreateHandle");
Expand Down Expand Up @@ -461,7 +434,7 @@ int ax_runner_ax650::inference()
return AX_ENGINE_RunSync(m_handle->handle, &m_handle->io_data);
}

int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size)
{
return AX_IVPS_CmmCopyTdp(dst, src, size);
}
// int ax_cmmcpy(unsigned long long int dst, unsigned long long int src, int size)
// {
// return AX_IVPS_CmmCopyTdp(dst, src, size);
// }
5 changes: 2 additions & 3 deletions src/runner/ax_model_runner/ax_model_runner_ax650.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ class ax_runner_ax650 : public ax_runner_base
int sub_init();

public:
int init(const char *model_file) override;
int init(std::vector<char> &model_buffer) override;
int init(MMap &model_buffer) override;
int init(const char *model_file, bool use_mmap = false) override;
int init(char *model_buffer, size_t model_size) override;

void release();
void deinit() override;
Expand Down
1 change: 1 addition & 0 deletions src/runner/utils/bfloat16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ static std::vector<std::pair<int, float>> topk_bfloat16(unsigned short *arr, int

// Create a vector of pairs with index and value
std::vector<std::pair<int, float>> indexedValues;
indexedValues.reserve(size);
for (int i = 0; i < size; ++i)
{
indexedValues.push_back(std::make_pair(i, bfloat16(arr[i])));
Expand Down
44 changes: 36 additions & 8 deletions src/runner/utils/memory_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,53 @@ bool file_exist(const std::string &path)

bool read_file(const std::string &path, std::vector<char> &data)
{

std::fstream fs(path, std::ios::in | std::ios::binary);

if (!fs.is_open())
{
return false;
}

fs.seekg(std::ios::end);
auto fs_end = fs.tellg();
fs.seekg(std::ios::beg);
auto fs_beg = fs.tellg();
// get file size
fs.seekg(0, std::ios::end);
size_t file_size = fs.tellg();
fs.seekg(0, std::ios::beg);

auto file_size = static_cast<size_t>(fs_end - fs_beg);
auto vector_size = data.size();
if (file_size == 0)
{
return false;
}

data.reserve(vector_size + file_size);
data.insert(data.end(), std::istreambuf_iterator<char>(fs), std::istreambuf_iterator<char>());
data.resize(file_size);
fs.read(data.data(), file_size);
// data.insert(data.end(), std::istreambuf_iterator<char>(fs), std::istreambuf_iterator<char>());

fs.close();

return true;
}

bool read_file(const std::string &path, char **data, size_t *len)
{
FILE *fp = fopen(path.c_str(), "rb");

if (!fp)
{
return false;
}

fseek(fp, 0, SEEK_END);

*len = ftell(fp);

fseek(fp, 0, SEEK_SET);

*data = new char[*len];

fread(*data, *len, 1, fp);

fclose(fp);

return true;
}
4 changes: 3 additions & 1 deletion src/runner/utils/memory_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
bool file_exist(const std::string &path);

bool read_file(const std::string &path, std::vector<char> &data);

bool read_file(const std::string &path, char **data, size_t *len);
class MMap
{
private:
Expand Down Expand Up @@ -36,6 +36,8 @@ class MMap
if (_add)
{
munmap(_add, _size);
_add = nullptr;
_size = 0;
}
}

Expand Down

0 comments on commit a19c591

Please sign in to comment.