Skip to content

Commit

Permalink
test link
Browse files Browse the repository at this point in the history
  • Loading branch information
cyita committed Jan 26, 2025
1 parent ee809e7 commit 9f10db8
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# set(TARGET convert-gguf-to-npu)
# add_executable(${TARGET} convert-gguf-to-npu.cpp)
# install(TARGETS ${TARGET} RUNTIME)
# target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
# target_compile_features(${TARGET} PRIVATE cxx_std_17)

cmake_minimum_required(VERSION 3.10)

project(LLM_GGUF_TO_NPU VERSION 1.0.0 LANGUAGES CXX)

set (CMAKE_CXX_STANDARD 17)
SET (CMAKE_CXX_STANDARD_REQUIRED True)

set(LIBRARY_DIR "D:\\yina\\llamacpplibs")
include_directories(${LIBRARY_DIR}/include)

add_library(llama STATIC IMPORTED)
set_target_properties(llama PROPERTIES IMPORTED_LOCATION ${LIBRARY_DIR}/llama.lib)

add_library(common STATIC IMPORTED)
set_target_properties(common PROPERTIES IMPORTED_LOCATION ${LIBRARY_DIR}/common.lib)

add_library(ggml STATIC IMPORTED)
set_target_properties(ggml PROPERTIES IMPORTED_LOCATION ${LIBRARY_DIR}/ggml.lib)


set(TARGET convert-gguf-to-npu)
add_executable(${TARGET} convert-gguf-to-npu.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

add_custom_command(TARGET convert-gguf-to-npu POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${LIBRARY_DIR}/llama.dll
${LIBRARY_DIR}/ggml.dll
${CMAKE_BINARY_DIR}/Release/
COMMENT "Copying npu_llm.dll to build/Release\n"
)

# add_custom_command(TARGET llama-cli-npu POST_BUILD
# COMMAND ${CMAKE_COMMAND} -E copy_directory
# ${DLL_DIR}/
# ${CMAKE_BINARY_DIR}/Release/
# COMMENT "Copying dependency to build/Release\n"
# )
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "arg.h"
#include "common.h"
#include "log.h"
#include "llama.h"
#include <filesystem>
#include <vector>
#include<iostream>

#ifdef _WIN32
#define PATH_SEP '\\'
#else
#define PATH_SEP '/'
#endif

static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s -m model.gguf -o output_dir --low-bit sym_int4 --quantization-group-size 0\n", argv[0]);
LOG("\n");
}

int main(int argc, char ** argv) {
gpt_params params;

if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_NPU, print_usage)) {
return 1;
}

gpt_init();

// init LLM

llama_backend_init();
llama_numa_init(params.numa);

enum gguf_npu_qtype type;

if (params.low_bit == "sym_int4") {
type = GGUF_TYPE_NPU_CW_Q4_0;
} else if (params.low_bit == "asym_int4") {
type = GGUF_TYPE_NPU_CW_Q4_1;
} else {
std::cerr << "\033[31m" << __func__ << ": error: Only support sym_int4 and asym_int4 but got " << params.low_bit << "\033[0m\n" << std::endl;
exit(1);
}

if (params.npu_outfile == "NPU_MODEL") {
fprintf(stderr , "\033[31m%s: error: Please provide npu model output dir with -o <output_dir>\033[0m\n" , __func__);
exit(1);
}

// initialize the model

llama_model_params model_params = llama_model_params_from_gpt_params(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);

if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
return 1;
}

// initialize the context

llama_context_params ctx_params = llama_context_params_from_gpt_params(params);

llama_context * ctx = llama_new_context_with_model(model, ctx_params);

if (ctx == NULL) {
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
return 1;
}

std::string output_dir = params.npu_outfile;
std::filesystem::path dirPath = output_dir;

// handle weight first
if(std::filesystem::create_directory(dirPath)) {
std::cout << "Directory created: " << dirPath << std::endl;
} else {
std::cout << "Failed to create directory or already exists: " << dirPath << "\n";
}

std::string weight_path = output_dir + PATH_SEP + "model_weights"; // TODO: optimize /
dirPath = weight_path;
if(std::filesystem::create_directory(dirPath)) {
std::cout << "Directory created: " << dirPath << std::endl;
} else {
std::cout << "Failed to create directory or already exists: " << dirPath << "\n";
}

if (params.quantization_group_size != 0) {
std::cerr << "\033[31mOnly support quantization group_size=0, fall back to channel wise quantization.\033[0m\n" << std::endl;
}

std::cout << "\033[32mConverting GGUF model to " << params.low_bit << " NPU model...\033[0m" << std::endl;
convert_gguf_to_npu_weight(model, weight_path.c_str(), type);

std::cout << "\033[32mModel weights saved to " << weight_path << "\033[0m"<< std::endl;

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

return 0;
}

0 comments on commit 9f10db8

Please sign in to comment.