Skip to content

Commit

Permalink
Fix memory issue with llama.cpp LLM pipeline, closes #824
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Nov 30, 2024
1 parent 7edc7aa commit cb22635
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 6 deletions.
40 changes: 35 additions & 5 deletions src/python/txtai/pipeline/llm/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# Conditional import
try:
from llama_cpp import Llama
import llama_cpp as llama

LLAMA_CPP = True
except ImportError:
Expand Down Expand Up @@ -45,11 +45,8 @@ def __init__(self, path, template=None, **kwargs):
# Check if this is a local path, otherwise download from the HF Hub
path = path if os.path.exists(path) else self.download(path)

# Default GPU layers if not already set
kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)

# Create llama.cpp instance
self.llm = Llama(path, n_ctx=0, verbose=kwargs.pop("verbose", False), **kwargs)
self.llm = self.create(path, **kwargs)

def stream(self, texts, maxlength, stream, stop, **kwargs):
for text in texts:
Expand Down Expand Up @@ -79,6 +76,39 @@ def download(self, path):
# Download and cache file
return hf_hub_download(repo_id="/".join(parts[:repo]), filename="/".join(parts[repo:]))

def create(self, path, **kwargs):
"""
Creates a new llama.cpp model instance.
Args:
path: path to model
kwargs: additional keyword args
Returns:
llama.cpp instance
"""

# Default n_ctx=0 if not already set. This sets n_ctx = n_ctx_train.
kwargs["n_ctx"] = kwargs.get("n_ctx", 0)

# Default GPU layers if not already set
kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)

# Default verbose flag
kwargs["verbose"] = kwargs.get("verbose", False)

# Create llama.cpp instance
try:
return llama.Llama(model_path=path, **kwargs)
except ValueError as e:
# Fallback to default n_ctx when not enough memory for n_ctx = n_ctx_train
if not kwargs["n_ctx"]:
kwargs.pop("n_ctx")
return llama.Llama(model_path=path, **kwargs)

# Raise exception if n_ctx manually specified
raise e

def messages(self, messages, maxlength, stream, stop, **kwargs):
"""
Processes a list of messages.
Expand Down
2 changes: 1 addition & 1 deletion test/python/testpipeline/testaudio/testmicrophone.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def int16(self, data):
return (data * absmax + offset).clip(i.min, i.max).astype(np.int16)

# Mock input stream
inputstream.return_value = RawInputStream()
inputstream.side_effect = RawInputStream

# Create microphone pipeline and read data
pipeline = Microphone()
Expand Down
42 changes: 42 additions & 0 deletions test/python/testpipeline/testllm/testllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import unittest

from unittest.mock import patch

from txtai.pipeline import LLM


Expand All @@ -12,6 +14,46 @@ class TestLlama(unittest.TestCase):
llama.cpp tests.
"""

@patch("llama_cpp.Llama")
def testContext(self, llama):
"""
Test n_ctx with llama.cpp
"""

class Llama:
"""
Mock llama.cpp instance to test invalid context
"""

def __init__(self, **kwargs):
if kwargs.get("n_ctx") == 0 or kwargs.get("n_ctx", 0) >= 10000:
raise ValueError("Failed to create context")

# Save parameters
self.params = kwargs

# Mock llama.cpp instance
llama.side_effect = Llama

# Model to test
path = "TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"

# Test omitting n_ctx falls back to default settings
llm = LLM(path)
self.assertNotIn("n_ctx", llm.generator.llm.params)

# Test n_ctx=0 falls back to default settings
llm = LLM(path, n_ctx=0)
self.assertNotIn("n_ctx", llm.generator.llm.params)

# Test n_ctx manually set
llm = LLM(path, n_ctx=1024)
self.assertEqual(llm.generator.llm.params["n_ctx"], 1024)

# Mock a value for n_ctx that's too big
with self.assertRaises(ValueError):
llm = LLM(path, n_ctx=10000)

def testGeneration(self):
"""
Test generation with llama.cpp
Expand Down

0 comments on commit cb22635

Please sign in to comment.