Skip to content

Commit

Permalink
llama : add llama_sampling API + move grammar in libllama
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Sep 3, 2024
1 parent b69a480 commit f648ca2
Show file tree
Hide file tree
Showing 48 changed files with 2,429 additions and 2,538 deletions.
6 changes: 0 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,6 @@ OBJ_COMMON = \
common/ngram-cache.o \
common/sampling.o \
common/train.o \
common/grammar-parser.o \
common/build-info.o \
common/json-schema-to-grammar.o

Expand Down Expand Up @@ -1167,11 +1166,6 @@ common/console.o: \
common/console.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/grammar-parser.o: \
common/grammar-parser.cpp \
common/grammar-parser.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/json-schema-to-grammar.o: \
common/json-schema-to-grammar.cpp \
common/json-schema-to-grammar.h
Expand Down
2 changes: 0 additions & 2 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ add_library(${TARGET} STATIC
sampling.cpp
console.h
console.cpp
grammar-parser.h
grammar-parser.cpp
json.hpp
json-schema-to-grammar.cpp
train.h
Expand Down
109 changes: 37 additions & 72 deletions common/common.cpp

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ struct cpu_params {
};

struct gpt_params {
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed

int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 0; // context size
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
Expand Down Expand Up @@ -120,8 +118,7 @@ struct gpt_params {
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings

// // sampling parameters
struct llama_sampling_params sparams;
struct gpt_sampling_params sparams;

std::string model = ""; // model path
std::string model_draft = ""; // draft model for speculative decoding
Expand Down Expand Up @@ -185,7 +182,6 @@ struct gpt_params {
bool flash_attn = false; // flash attention

bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool ignore_eos = false; // ignore generated EOS tokens
bool logits_all = false; // return logits for all tokens in the batch
bool use_mmap = true; // use mmap for faster loads
bool use_mlock = false; // use mlock to keep model in memory
Expand Down
Loading

0 comments on commit f648ca2

Please sign in to comment.