Skip to content

Commit

Permalink
Merge branch 'ggerganov:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
sealad886 authored Jun 15, 2024
2 parents 4313e50 + 0c7b359 commit 12fd2d5
Show file tree
Hide file tree
Showing 33 changed files with 6,241 additions and 5,250 deletions.
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ indent_size = 2

[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab

[examples/cvector-generator/*.txt]
insert_final_newline = unset
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
name: llama-bin-macos-arm64.zip

macOS-latest-cmake-x64:
runs-on: macos-latest
runs-on: macos-12

steps:
- name: Clone
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,8 @@ if (LLAMA_SYCL)
endif()

set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")

if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ BUILD_TARGETS = \
llama-tokenize \
llama-train-text-from-scratch \
llama-vdot \
llama-cvector-generator \
tests/test-c.o

# Binaries only useful for tests
Expand Down Expand Up @@ -922,6 +923,10 @@ llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(C
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
Expand Down
60 changes: 60 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1576,6 +1576,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true;
}
params.out_file = argv[i];
params.cvector_outfile = argv[i];
return true;
}
if (arg == "-ofreq" || arg == "--output-frequency") {
Expand Down Expand Up @@ -1610,6 +1611,55 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.i_chunk = std::stoi(argv[i]);
return true;
}
// cvector params
if (arg == "--completions-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_completions_file = argv[i];
return true;
}
if (arg == "--positive-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_positive_file = argv[i];
return true;
}
if (arg == "--negative-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_negative_file = argv[i];
return true;
}
if (arg == "--completions") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_completions = std::stoi(argv[i]);
return true;
}
if (arg == "--pca-batch") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_pca_batch = std::stoi(argv[i]);
return true;
}
if (arg == "--pca-iter") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_pca_iterations = std::stoi(argv[i]);
return true;
}
#ifndef LOG_DISABLE_LOGS
// Parse args for logging parameters
if (log_param_single_parse(argv[i])) {
Expand Down Expand Up @@ -1931,6 +1981,16 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "logging", " --log-append", "Don't truncate the old log file." });
#endif // LOG_DISABLE_LOGS

options.push_back({ "cvector" });
options.push_back({ "cvector", "-o, --output FNAME", "output file (default: '%s')", params.cvector_outfile.c_str() });
options.push_back({ "cvector", " --positive-file FNAME", "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
options.push_back({ "cvector", " --completions-file FNAME",
"completions file (default: '%s')", params.cvector_completions_file.c_str() });
options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
options.push_back({ "cvector", " --batch-pca N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
options.push_back({ "cvector", " --iter-pca N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });

printf("usage: %s [options]\n", argv[0]);

for (const auto & o : options) {
Expand Down
9 changes: 9 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ struct gpt_params {

bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity

// cvector-generator params
int n_completions = 64;
int n_pca_batch = 20;
int n_pca_iterations = 1000;
std::string cvector_outfile = "control_vector.gguf";
std::string cvector_completions_file = "examples/cvector-generator/completions.txt";
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
};

void gpt_params_handle_model_default(gpt_params & params);
Expand Down
1 change: 1 addition & 0 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
]

Expand Down
3 changes: 3 additions & 0 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
res = "smaug-bpe"
if chkhsh == "c7ea5862a53e4272c035c8238367063e2b270d51faa48c0f09e9d5b54746c360":
# ref: https://huggingface.co/LumiOpen/Poro-34B-chat
res = "poro-chat"
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
res = "jina-v2-code"
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})

if (EMSCRIPTEN)
else()
add_subdirectory(cvector-generator)
add_subdirectory(baby-llama)
add_subdirectory(batched-bench)
add_subdirectory(batched)
Expand Down
5 changes: 5 additions & 0 deletions examples/cvector-generator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(TARGET llama-cvector-generator)
add_executable(${TARGET} cvector-generator.cpp pca.hpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
34 changes: 34 additions & 0 deletions examples/cvector-generator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# cvector-generator

This example demonstrates how to generate a control vector using gguf models.

Related PRs:
- [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970)
- (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880)
- [Add cvector-generator example](https://github.com/ggerganov/llama.cpp/pull/7514)

## Examples

```sh
# CPU only
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf

# With GPU
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99

# With advanced options
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --batch-pca 100

# To see help message
./cvector-generator -h
# Then, have a look at "cvector" section
```

## Tips and tricks

If you have multiple lines per prompt, you can escape the newline character (change it to `\n`). For example:

```
<|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
<|im_start|>system\nYou are in a very good mood today<|im_end|>
```
Loading

0 comments on commit 12fd2d5

Please sign in to comment.