Skip to content

Commit

Permalink
feat: Update llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Feb 27, 2024
1 parent 4d574bd commit fea33c9
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 5 additions & 0 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors

# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
# };
Expand Down Expand Up @@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
LLAMA_FTYPE_MOSTLY_IQ2_S = 28
LLAMA_FTYPE_MOSTLY_IQ2_M = 29
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
LLAMA_FTYPE_GUESSED = 1024

# enum llama_rope_scaling_type {
Expand Down Expand Up @@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
# float yarn_beta_fast; // YaRN low correction dim
# float yarn_beta_slow; // YaRN high correction dim
# uint32_t yarn_orig_ctx; // YaRN original context size
# float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)

# ggml_backend_sched_eval_callback cb_eval;
# void * cb_eval_user_data;
Expand Down Expand Up @@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
yarn_beta_fast (float): YaRN low correction dim
yarn_beta_slow (float): YaRN high correction dim
yarn_orig_ctx (int): YaRN original context size
defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
type_k (int): data type for K cache
Expand All @@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
("yarn_beta_fast", ctypes.c_float),
("yarn_beta_slow", ctypes.c_float),
("yarn_orig_ctx", ctypes.c_uint32),
("defrag_thold", ctypes.c_float),
("cb_eval", ggml_backend_sched_eval_callback),
("cb_eval_user_data", ctypes.c_void_p),
("type_k", ctypes.c_int),
Expand Down

0 comments on commit fea33c9

Please sign in to comment.