Skip to content

Commit

Permalink
llama-cli, llava-cli, minicpmv-cli: build process
Browse files Browse the repository at this point in the history
  • Loading branch information
mtasic85 committed Nov 26, 2024
1 parent b8c617c commit 2e4609e
Show file tree
Hide file tree
Showing 8 changed files with 294 additions and 331 deletions.
12 changes: 6 additions & 6 deletions Makefile_5.patch
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,37 @@
@@ -970,7 +970,11 @@
$(DIR_COMMON)/build-info.o \
$(DIR_COMMON)/json-schema-to-grammar.o

-OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
+OBJ_LLAVA = \
+ examples/llava/llava.o \
+ examples/llava/clip.o
+
+OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) $(OBJ_LLAVA)

LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT)
LIB_GGML_S = $(LIB_PRE)ggml.a
@@ -1153,6 +1157,24 @@
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))

+#
+# llama-cpp-cffi static library
+#
+llama-cli-static: examples/main/main.cpp \
+ $(OBJ_ALL)
+ $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+ ar rcs llama_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+ ar rcs libllama_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+
+llava-cli-static: examples/llava/llava-cli.cpp \
+ $(OBJ_ALL)
+ $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+ ar rcs llava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+ ar rcs libllava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+
+minicpmv-cli-static: examples/llava/minicpmv-cli.cpp \
+ $(OBJ_ALL)
+ $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+ ar rcs llava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+ ar rcs libminicpmv_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<)
+
llama-cli: examples/main/main.cpp \
$(OBJ_ALL)
Expand Down
5 changes: 5 additions & 0 deletions examples/demo_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
hf_repo='bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF',
hf_file='SmolLM-1.7B-Instruct-v0.2-Q4_K_M.gguf',
),
'HuggingFaceTB/SmolLM2-1.7B-Instruct': Model(
creator_hf_repo='HuggingFaceTB/SmolLM2-1.7B-Instruct',
hf_repo='bartowski/SmolLM2-1.7B-Instruct-GGUF',
hf_file='SmolLM2-1.7B-Instruct-Q4_K_M.gguf',
),
'microsoft/phi-2': Model(
creator_hf_repo='microsoft/phi-2',
hf_repo='andrijdavid/phi-2-GGUF',
Expand Down
2 changes: 1 addition & 1 deletion examples/demo_smollm_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def demo(model: Model):

if __name__ == '__main__':
models_ids: list[str] = [
'HuggingFaceTB/SmolLM-1.7B-Instruct-v0.2',
'HuggingFaceTB/SmolLM2-1.7B-Instruct',
]

for model_id in models_ids:
Expand Down
8 changes: 4 additions & 4 deletions examples/demo_smollm_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def demo(model: Model):
print(model)
config = get_config(model.creator_hf_repo)

options = Options(
ctx_size=config.max_position_embeddings,
predict=-2,
Expand All @@ -27,9 +27,9 @@ def demo(model: Model):

if __name__ == '__main__':
models_ids: list[str] = [
'HuggingFaceTB/SmolLM-1.7B-Instruct-v0.2',
'HuggingFaceTB/SmolLM2-1.7B-Instruct',
]

for model_id in models_ids:
model: Model = models[model_id]
demo(model)
demo(model)
16 changes: 6 additions & 10 deletions llama/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import os
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1'
# import os
# os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1'

from .formatter import *
from .model import *
from .options import *

try:
from .llama_cli import *
except ImportError:
pass
from .formatter import * # noqa
from .model import * # noqa
from .options import * # noqa
from .llama_cli import * # noqa
48 changes: 26 additions & 22 deletions llama/llama_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
__all__ = ['llama_generate']

import os
import json
import ctypes
from queue import Queue
from copy import deepcopy
Expand All @@ -21,22 +20,27 @@
LLAMA_CPP_BACKEND = os.getenv('LLAMA_CPP_BACKEND', None)


if LLAMA_CPP_BACKEND:
if LLAMA_CPP_BACKEND in ('cuda', 'CUDA'):
from ._llama_cli_cuda_12_6 import lib, ffi
elif LLAMA_CPP_BACKEND in ('vulkan', 'VULKAN'):
from ._llama_cli_vulkan_1_x import lib, ffi
elif LLAMA_CPP_BACKEND in ('cpu', 'CPU'):
from ._llama_cli_cpu import lib, ffi
try:
if LLAMA_CPP_BACKEND:
if LLAMA_CPP_BACKEND in ('cuda', 'CUDA'):
from ._llama_cli_cuda_12_6_3 import lib, ffi
elif LLAMA_CPP_BACKEND in ('vulkan', 'VULKAN'):
from ._llama_cli_vulkan_1_x import lib, ffi
elif LLAMA_CPP_BACKEND in ('cpu', 'CPU'):
from ._llama_cli_cpu import lib, ffi
else:
raise ValueError(f'{LLAMA_CPP_BACKEND = }')
else:
raise ValueError(f'{LLAMA_CPP_BACKEND = }')
else:
if is_cuda_available():
from ._llama_cli_cuda_12_6 import lib, ffi
elif is_vulkan_available():
from ._llama_cli_vulkan_1_x import lib, ffi
else:
from ._llama_cli_cpu import lib, ffi
if is_cuda_available():
from ._llama_cli_cuda_12_6_3 import lib, ffi
elif is_vulkan_available():
from ._llama_cli_vulkan_1_x import lib, ffi
else:
from ._llama_cli_cpu import lib, ffi
except ImportError:
from ._llama_cli_cpu import lib, ffi
except ModuleNotFoundError:
from ._llama_cli_cpu import lib, ffi


_LLAMA_YIELD_TOKEN_T = ctypes.CFUNCTYPE(None, ctypes.c_char_p)
Expand All @@ -55,7 +59,7 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict):
return

metadata['prev_chunk_bytes'] = b''

if not stop_on_special_token:
queue.put(chunk)
return
Expand All @@ -64,10 +68,10 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict):
buffer = metadata['buffer']
buffer += chunk
metadata['buffer'] = buffer

subtoken_found = False
token_found = False

for token in special_tokens:
for i in range(len(token)):
subtoken = token[:i + 1]
Expand All @@ -85,13 +89,13 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict):
metadata['buffer'] = buffer
metadata['should_stop'] = True
token_found = True

if subtoken_found:
return

if token_found:
return

buffer = metadata['buffer']
queue.put(buffer)
metadata['buffer'] = ''
Expand All @@ -111,7 +115,7 @@ def _llama_cli_main(argc, argv, queue: Queue, metadata: dict):
cffi__llama_yield_token_callback = ffi.cast('void (*_llama_yield_token_t)(const char * token)', _llama_yield_token_address)
cffi__llama_should_stop_callback = ffi.cast('int (*_llama_should_stop_t)(void)', _llama_should_stop_address)

r = lib._llama_cli_main(argc, argv, cffi__llama_yield_token_callback, cffi__llama_should_stop_callback, 1)
r = lib._llama_cli_main(argc, argv, cffi__llama_yield_token_callback, cffi__llama_should_stop_callback)
# assert r == 0
queue.put(None)

Expand Down
Loading

0 comments on commit 2e4609e

Please sign in to comment.