Skip to content

Commit

Permalink
Offload KQV by default
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Jan 18, 2024
1 parent 6bfe98b commit 48c3b77
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion llama_cpp/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
mul_mat_q: bool = True,
logits_all: bool = False,
embedding: bool = False,
offload_kqv: bool = False,
offload_kqv: bool = True,
# Sampling Params
last_n_tokens_size: int = 64,
# LoRA Params
Expand Down
2 changes: 1 addition & 1 deletion llama_cpp/server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class ModelSettings(BaseSettings):
logits_all: bool = Field(default=True, description="Whether to return logits.")
embedding: bool = Field(default=True, description="Whether to use embeddings.")
offload_kqv: bool = Field(
default=False, description="Whether to offload kqv to the GPU."
default=True, description="Whether to offload kqv to the GPU."
)
# Sampling Params
last_n_tokens_size: int = Field(
Expand Down

0 comments on commit 48c3b77

Please sign in to comment.