diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9210e9fea5eb3..b2df4f2fb6cbc 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -20,7 +20,12 @@ else() add_subdirectory(batched) add_subdirectory(embedding) add_subdirectory(eval-callback) - add_subdirectory(gbnf-validator) + + if (NOT WIN32) + # disabled on Windows because it uses internal functions not exported with LLAMA_API + add_subdirectory(gbnf-validator) + endif() + add_subdirectory(gguf-hash) add_subdirectory(gguf-split) add_subdirectory(gguf) @@ -51,7 +56,10 @@ else() add_subdirectory(convert-llama2c-to-ggml) add_subdirectory(cvector-generator) add_subdirectory(export-lora) - add_subdirectory(quantize-stats) + if (NOT WIN32) + # disabled on Windows because it uses internal functions not exported with LLAMA_API + add_subdirectory(quantize-stats) + endif() add_subdirectory(llava) if (GGML_RPC) add_subdirectory(rpc) diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index 7e62657e118a4..75f63f93891f5 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -287,7 +287,7 @@ struct split_strategy { } void print_info() { - printf("n_split: %ld\n", ctx_outs.size()); + printf("n_split: %zu\n", ctx_outs.size()); int i_split = 0; for (auto & ctx_out : ctx_outs) { // re-calculate the real gguf size for each split (= metadata size + total size of all tensors) @@ -297,7 +297,7 @@ struct split_strategy { total_size += ggml_nbytes(t); } total_size = total_size / 1000 / 1000; // convert to megabytes - printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size); + printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size); i_split++; } } diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index bac606f471639..2338ad1067dde 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1521,7 +1521,7 @@ int main(int argc, char ** argv) { for (const auto & inst : params_instances) { params_idx++; if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count); + fprintf(stderr, "llama-bench: benchmark %d/%zu: starting\n", params_idx, params_count); } // keep the same model between tests when possible if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) { @@ -1573,14 +1573,14 @@ int main(int argc, char ** argv) { // warmup run if (t.n_prompt > 0) { if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count); + fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count); } //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads); } if (t.n_gen > 0) { if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count); + fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count); } test_gen(ctx, 1, t.n_threads); } @@ -1592,14 +1592,14 @@ int main(int argc, char ** argv) { if (t.n_prompt > 0) { if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, + fprintf(stderr, "llama-bench: benchmark %d/%zu: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps); } test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads); } if (t.n_gen > 0) { if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, + fprintf(stderr, "llama-bench: benchmark %d/%zu: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps); } test_gen(ctx, t.n_gen, t.n_threads); diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index e78a8596d8cfe..23ff4db27a420 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -143,7 +143,7 @@ int main(int argc, char ** argv) { std::vector file_chunk = chunk_file(context_file, params.chunk_size, params.chunk_separator); chunks.insert(chunks.end(), file_chunk.begin(), file_chunk.end()); } - LOG_INF("Number of chunks: %ld\n", chunks.size()); + LOG_INF("Number of chunks: %zu\n", chunks.size()); llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 12ad542565128..c97e227242c42 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -394,7 +394,7 @@ int main(int raw_argc, char ** raw_argv) { } if (show_token_count) { - printf("Total number of tokens: %ld\n", tokens.size()); + printf("Total number of tokens: %zu\n", tokens.size()); } // silence valgrind llama_free(ctx); diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 1b3d9896781f1..c91e931634255 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -32,6 +32,13 @@ else() endif() endif() +# remove the lib prefix on win32 mingw +if (WIN32) + set(CMAKE_STATIC_LIBRARY_PREFIX "") + set(CMAKE_SHARED_LIBRARY_PREFIX "") + set(CMAKE_SHARED_MODULE_PREFIX "") +endif() + option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index a267a8b596bd1..349f4c57fdf51 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -194,11 +194,6 @@ endif() if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) - - if (BUILD_SHARED_LIBS) - # TODO: should not use this - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) - endif() endif() # ggml diff --git a/ggml/src/ggml-cpu/amx/amx.cpp b/ggml/src/ggml-cpu/amx/amx.cpp index b9074cb3aca55..5ec5263ceb4ba 100644 --- a/ggml/src/ggml-cpu/amx/amx.cpp +++ b/ggml/src/ggml-cpu/amx/amx.cpp @@ -122,7 +122,7 @@ static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - void * data = aligned_alloc(TENSOR_ALIGNMENT, size); + void * data = ggml_aligned_malloc(size); if (data == NULL) { fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size); return NULL; diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index ea17d6077e7cf..52881737c06fa 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -126,8 +126,7 @@ struct ggml_arm_arch_features_type { #endif #include - -#if !defined(__clang__) +#if defined(_MSC_VER) && !defined(__clang__) #define GGML_CACHE_ALIGN __declspec(align(GGML_CACHE_LINE)) typedef volatile LONG atomic_int; @@ -12945,7 +12944,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data); #include "windows.h" // TODO: support > 64 CPUs -bool ggml_thread_apply_affinity(bool * mask) { +static bool ggml_thread_apply_affinity(bool * mask) { HANDLE h = GetCurrentThread(); uint64_t bitmask = 0ULL; diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 00a1546a7d5d0..f961134edd735 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -74,8 +74,8 @@ static inline int ggml_up(int n, int m) { // GGML_ATTRIBUTE_FORMAT(2, 3) -void ggml_log_internal (enum ggml_log_level level, const char * format, ...); -void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data); +GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...); +GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data); #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__) #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) @@ -304,8 +304,8 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); // Memory allocation -void * ggml_aligned_malloc(size_t size); -void ggml_aligned_free(void * ptr, size_t size); +GGML_API void * ggml_aligned_malloc(size_t size); +GGML_API void ggml_aligned_free(void * ptr, size_t size); // FP16 to FP32 conversion diff --git a/ggml/src/ggml-threading.h b/ggml/src/ggml-threading.h index ce975d880a70f..dec2c8840aa36 100644 --- a/ggml/src/ggml-threading.h +++ b/ggml/src/ggml-threading.h @@ -1,11 +1,13 @@ #pragma once +#include "ggml.h" + #ifdef __cplusplus extern "C" { #endif -void ggml_critical_section_start(void); -void ggml_critical_section_end(void); +GGML_API void ggml_critical_section_start(void); +GGML_API void ggml_critical_section_end(void); #ifdef __cplusplus } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f3b3908b11222..2d3ea09945790 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,10 +1,3 @@ -# TODO: should not use this -if (WIN32) - if (BUILD_SHARED_LIBS) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) - endif() -endif() - llama_add_compile_flags() # diff --git a/src/llama.cpp b/src/llama.cpp index cae3f76ad1677..49ef5b78a515c 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1794,7 +1794,7 @@ struct llama_file { DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL); if (!bufLen) { - ret = format("Win32 error code: %s", error_code); + ret = format("Win32 error code: %lx", error_code); } else { ret = lpMsgBuf; LocalFree(lpMsgBuf); @@ -2132,7 +2132,7 @@ struct llama_mmap { HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll"); // may fail on pre-Windows 8 systems - pPrefetchVirtualMemory = reinterpret_cast (GetProcAddress(hKernel32, "PrefetchVirtualMemory")); + pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory"); if (pPrefetchVirtualMemory) { // advise the kernel to preload the mapped memory @@ -21577,7 +21577,7 @@ float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) { throw std::runtime_error(format("negative index out of range [0, %d)", ctx->n_outputs)); } } else if ((size_t) i >= ctx->output_ids.size()) { - throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size())); + throw std::runtime_error(format("out of range [0, %zu)", ctx->output_ids.size())); } else { j = ctx->output_ids[i]; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 82373ff4e1862..daeed4564c1d1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -84,38 +84,50 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) -# build test-tokenizer-1-bpe target once and add many tests -add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp) -target_link_libraries(test-tokenizer-1-bpe PRIVATE common) -install(TARGETS test-tokenizer-1-bpe RUNTIME) - -# TODO: disabled due to slowness -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) - -# build test-tokenizer-1-spm target once and add many tests -add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp) -target_link_libraries(test-tokenizer-1-spm PRIVATE common) -install(TARGETS test-tokenizer-1-spm RUNTIME) - -llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf) -#llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) - -# llama_target_and_test(test-double-float.cpp) # SLOW + +if (NOT WIN32) + # these tests are disabled on Windows because they use internal functions not exported with LLAMA_API + llama_target_and_test(test-sampling.cpp) + llama_target_and_test(test-grammar-parser.cpp) + llama_target_and_test(test-grammar-integration.cpp) + llama_target_and_test(test-llama-grammar.cpp) + # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8 + if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") + llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) + target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server) + endif() + + + # build test-tokenizer-1-bpe target once and add many tests + add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp) + target_link_libraries(test-tokenizer-1-bpe PRIVATE common) + install(TARGETS test-tokenizer-1-bpe RUNTIME) + + # TODO: disabled due to slowness + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) + #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) + + # build test-tokenizer-1-spm target once and add many tests + add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp) + target_link_libraries(test-tokenizer-1-spm PRIVATE common) + install(TARGETS test-tokenizer-1-spm RUNTIME) + + llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf) + #llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) + + # llama_target_and_test(test-double-float.cpp) # SLOW +endif() + llama_target_and_test(test-log.cpp) llama_target_and_test(test-arg-parser.cpp) -llama_target_and_test(test-sampling.cpp) llama_target_and_test(test-chat-template.cpp) -llama_target_and_test(test-grammar-parser.cpp) -llama_target_and_test(test-grammar-integration.cpp) -llama_target_and_test(test-llama-grammar.cpp) # llama_target_and_test(test-opt.cpp) # SLOW llama_target_and_test(test-backend-ops.cpp) @@ -130,11 +142,6 @@ if (NOT GGML_BACKEND_DL) llama_target_and_test(test-rope.cpp) endif() -# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8 -if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") - llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) - target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server) -endif() # dummy executable - not installed get_filename_component(TEST_TARGET test-c.c NAME_WE)