From 2e4609eb489d43217a5d7e77045827ccb81e7806 Mon Sep 17 00:00:00 2001 From: Marko Tasic Date: Tue, 26 Nov 2024 16:45:21 +0100 Subject: [PATCH] llama-cli, llava-cli, minicpmv-cli: build process --- Makefile_5.patch | 12 +- examples/demo_models.py | 5 + examples/demo_smollm_chat.py | 2 +- examples/demo_smollm_tool.py | 8 +- llama/__init__.py | 16 +- llama/llama_cli.py | 48 +++-- poetry.lock | 138 ++++++------ scripts/build.py | 396 +++++++++++++++-------------------- 8 files changed, 294 insertions(+), 331 deletions(-) diff --git a/Makefile_5.patch b/Makefile_5.patch index caa922d..3400b3f 100644 --- a/Makefile_5.patch +++ b/Makefile_5.patch @@ -3,37 +3,37 @@ @@ -970,7 +970,11 @@ $(DIR_COMMON)/build-info.o \ $(DIR_COMMON)/json-schema-to-grammar.o - + -OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) +OBJ_LLAVA = \ + examples/llava/llava.o \ + examples/llava/clip.o + +OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) $(OBJ_LLAVA) - + LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT) LIB_GGML_S = $(LIB_PRE)ggml.a @@ -1153,6 +1157,24 @@ # Helper function that replaces .c, .cpp, and .cu file endings with .o: GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) - + +# +# llama-cpp-cffi static library +# +llama-cli-static: examples/main/main.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) -+ ar rcs llama_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) ++ ar rcs libllama_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) + +llava-cli-static: examples/llava/llava-cli.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) -+ ar rcs llava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) ++ ar rcs libllava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) + +minicpmv-cli-static: examples/llava/minicpmv-cli.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $(LIB_CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) -+ ar rcs llava_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) ++ ar rcs libminicpmv_cli.a $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) + llama-cli: examples/main/main.cpp \ $(OBJ_ALL) diff --git a/examples/demo_models.py b/examples/demo_models.py index 880e677..df46074 100644 --- a/examples/demo_models.py +++ b/examples/demo_models.py @@ -28,6 +28,11 @@ hf_repo='bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF', hf_file='SmolLM-1.7B-Instruct-v0.2-Q4_K_M.gguf', ), + 'HuggingFaceTB/SmolLM2-1.7B-Instruct': Model( + creator_hf_repo='HuggingFaceTB/SmolLM2-1.7B-Instruct', + hf_repo='bartowski/SmolLM2-1.7B-Instruct-GGUF', + hf_file='SmolLM2-1.7B-Instruct-Q4_K_M.gguf', + ), 'microsoft/phi-2': Model( creator_hf_repo='microsoft/phi-2', hf_repo='andrijdavid/phi-2-GGUF', diff --git a/examples/demo_smollm_chat.py b/examples/demo_smollm_chat.py index 8e5ef14..a95381b 100644 --- a/examples/demo_smollm_chat.py +++ b/examples/demo_smollm_chat.py @@ -26,7 +26,7 @@ def demo(model: Model): if __name__ == '__main__': models_ids: list[str] = [ - 'HuggingFaceTB/SmolLM-1.7B-Instruct-v0.2', + 'HuggingFaceTB/SmolLM2-1.7B-Instruct', ] for model_id in models_ids: diff --git a/examples/demo_smollm_tool.py b/examples/demo_smollm_tool.py index cb50d6d..3547f3b 100644 --- a/examples/demo_smollm_tool.py +++ b/examples/demo_smollm_tool.py @@ -7,7 +7,7 @@ def demo(model: Model): print(model) config = get_config(model.creator_hf_repo) - + options = Options( ctx_size=config.max_position_embeddings, predict=-2, @@ -27,9 +27,9 @@ def demo(model: Model): if __name__ == '__main__': models_ids: list[str] = [ - 'HuggingFaceTB/SmolLM-1.7B-Instruct-v0.2', + 'HuggingFaceTB/SmolLM2-1.7B-Instruct', ] - + for model_id in models_ids: model: Model = models[model_id] - demo(model) \ No newline at end of file + demo(model) diff --git a/llama/__init__.py b/llama/__init__.py index d948efe..24cabe9 100644 --- a/llama/__init__.py +++ b/llama/__init__.py @@ -1,11 +1,7 @@ -import os -os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1' +# import os +# os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1' -from .formatter import * -from .model import * -from .options import * - -try: - from .llama_cli import * -except ImportError: - pass +from .formatter import * # noqa +from .model import * # noqa +from .options import * # noqa +from .llama_cli import * # noqa diff --git a/llama/llama_cli.py b/llama/llama_cli.py index e70e535..f3f1510 100644 --- a/llama/llama_cli.py +++ b/llama/llama_cli.py @@ -1,7 +1,6 @@ __all__ = ['llama_generate'] import os -import json import ctypes from queue import Queue from copy import deepcopy @@ -21,22 +20,27 @@ LLAMA_CPP_BACKEND = os.getenv('LLAMA_CPP_BACKEND', None) -if LLAMA_CPP_BACKEND: - if LLAMA_CPP_BACKEND in ('cuda', 'CUDA'): - from ._llama_cli_cuda_12_6 import lib, ffi - elif LLAMA_CPP_BACKEND in ('vulkan', 'VULKAN'): - from ._llama_cli_vulkan_1_x import lib, ffi - elif LLAMA_CPP_BACKEND in ('cpu', 'CPU'): - from ._llama_cli_cpu import lib, ffi +try: + if LLAMA_CPP_BACKEND: + if LLAMA_CPP_BACKEND in ('cuda', 'CUDA'): + from ._llama_cli_cuda_12_6_3 import lib, ffi + elif LLAMA_CPP_BACKEND in ('vulkan', 'VULKAN'): + from ._llama_cli_vulkan_1_x import lib, ffi + elif LLAMA_CPP_BACKEND in ('cpu', 'CPU'): + from ._llama_cli_cpu import lib, ffi + else: + raise ValueError(f'{LLAMA_CPP_BACKEND = }') else: - raise ValueError(f'{LLAMA_CPP_BACKEND = }') -else: - if is_cuda_available(): - from ._llama_cli_cuda_12_6 import lib, ffi - elif is_vulkan_available(): - from ._llama_cli_vulkan_1_x import lib, ffi - else: - from ._llama_cli_cpu import lib, ffi + if is_cuda_available(): + from ._llama_cli_cuda_12_6_3 import lib, ffi + elif is_vulkan_available(): + from ._llama_cli_vulkan_1_x import lib, ffi + else: + from ._llama_cli_cpu import lib, ffi +except ImportError: + from ._llama_cli_cpu import lib, ffi +except ModuleNotFoundError: + from ._llama_cli_cpu import lib, ffi _LLAMA_YIELD_TOKEN_T = ctypes.CFUNCTYPE(None, ctypes.c_char_p) @@ -55,7 +59,7 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict): return metadata['prev_chunk_bytes'] = b'' - + if not stop_on_special_token: queue.put(chunk) return @@ -64,10 +68,10 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict): buffer = metadata['buffer'] buffer += chunk metadata['buffer'] = buffer - + subtoken_found = False token_found = False - + for token in special_tokens: for i in range(len(token)): subtoken = token[:i + 1] @@ -85,13 +89,13 @@ def _llama_yield_token_func(chunk_bytes: bytes, queue: Queue, metadata: dict): metadata['buffer'] = buffer metadata['should_stop'] = True token_found = True - + if subtoken_found: return if token_found: return - + buffer = metadata['buffer'] queue.put(buffer) metadata['buffer'] = '' @@ -111,7 +115,7 @@ def _llama_cli_main(argc, argv, queue: Queue, metadata: dict): cffi__llama_yield_token_callback = ffi.cast('void (*_llama_yield_token_t)(const char * token)', _llama_yield_token_address) cffi__llama_should_stop_callback = ffi.cast('int (*_llama_should_stop_t)(void)', _llama_should_stop_address) - r = lib._llama_cli_main(argc, argv, cffi__llama_yield_token_callback, cffi__llama_should_stop_callback, 1) + r = lib._llama_cli_main(argc, argv, cffi__llama_yield_token_callback, cffi__llama_should_stop_callback) # assert r == 0 queue.put(None) diff --git a/poetry.lock b/poetry.lock index ae6cd43..01d312e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1218,13 +1218,13 @@ pyyaml = ">=5.1" [[package]] name = "mkdocs-material" -version = "9.5.45" +version = "9.5.46" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.45-py3-none-any.whl", hash = "sha256:a9be237cfd0be14be75f40f1726d83aa3a81ce44808dc3594d47a7a592f44547"}, - {file = "mkdocs_material-9.5.45.tar.gz", hash = "sha256:286489cf0beca4a129d91d59d6417419c63bceed1ce5cd0ec1fc7e1ebffb8189"}, + {file = "mkdocs_material-9.5.46-py3-none-any.whl", hash = "sha256:98f0a2039c62e551a68aad0791a8d41324ff90c03a6e6cea381a384b84908b83"}, + {file = "mkdocs_material-9.5.46.tar.gz", hash = "sha256:ae2043f4238e572f9a40e0b577f50400d6fc31e2fef8ea141800aebf3bd273d7"}, ] [package.dependencies] @@ -1450,13 +1450,13 @@ files = [ [[package]] name = "openai" -version = "1.55.0" +version = "1.55.1" description = "The official Python library for the openai API" optional = true python-versions = ">=3.8" files = [ - {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"}, - {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"}, + {file = "openai-1.55.1-py3-none-any.whl", hash = "sha256:d10d96a4f9dc5f05d38dea389119ec8dcd24bc9698293c8357253c601b4a77a5"}, + {file = "openai-1.55.1.tar.gz", hash = "sha256:471324321e7739214f16a544e801947a046d3c5d516fae8719a317234e4968d3"}, ] [package.dependencies] @@ -1684,62 +1684,82 @@ test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "pycares" -version = "4.4.0" +version = "4.5.0" description = "Python interface for c-ares" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pycares-4.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:24da119850841d16996713d9c3374ca28a21deee056d609fbbed29065d17e1f6"}, - {file = "pycares-4.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8f64cb58729689d4d0e78f0bfb4c25ce2f851d0274c0273ac751795c04b8798a"}, - {file = "pycares-4.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d33e2a1120887e89075f7f814ec144f66a6ce06a54f5722ccefc62fbeda83cff"}, - {file = "pycares-4.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c680fef1b502ee680f8f0b95a41af4ec2c234e50e16c0af5bbda31999d3584bd"}, - {file = "pycares-4.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fff16b09042ba077f7b8aa5868d1d22456f0002574d0ba43462b10a009331677"}, - {file = "pycares-4.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:229a1675eb33bc9afb1fc463e73ee334950ccc485bc83a43f6ae5839fb4d5fa3"}, - {file = "pycares-4.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3aebc73e5ad70464f998f77f2da2063aa617cbd8d3e8174dd7c5b4518f967153"}, - {file = "pycares-4.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef64649eba56448f65e26546d85c860709844d2fc22ef14d324fe0b27f761a9"}, - {file = "pycares-4.4.0-cp310-cp310-win32.whl", hash = "sha256:4afc2644423f4eef97857a9fd61be9758ce5e336b4b0bd3d591238bb4b8b03e0"}, - {file = "pycares-4.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5ed4e04af4012f875b78219d34434a6d08a67175150ac1b79eb70ab585d4ba8c"}, - {file = "pycares-4.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bce8db2fc6f3174bd39b81405210b9b88d7b607d33e56a970c34a0c190da0490"}, - {file = "pycares-4.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9a0303428d013ccf5c51de59c83f9127aba6200adb7fd4be57eddb432a1edd2a"}, - {file = "pycares-4.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afb91792f1556f97be7f7acb57dc7756d89c5a87bd8b90363a77dbf9ea653817"}, - {file = "pycares-4.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b61579cecf1f4d616e5ea31a6e423a16680ab0d3a24a2ffe7bb1d4ee162477ff"}, - {file = "pycares-4.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7af06968cbf6851566e806bf3e72825b0e6671832a2cbe840be1d2d65350710"}, - {file = "pycares-4.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ceb12974367b0a68a05d52f4162b29f575d241bd53de155efe632bf2c943c7f6"}, - {file = "pycares-4.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2eeec144bcf6a7b6f2d74d6e70cbba7886a84dd373c886f06cb137a07de4954c"}, - {file = "pycares-4.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e3a6f7cfdfd11eb5493d6d632e582408c8f3b429f295f8799c584c108b28db6f"}, - {file = "pycares-4.4.0-cp311-cp311-win32.whl", hash = "sha256:34736a2ffaa9c08ca9c707011a2d7b69074bbf82d645d8138bba771479b2362f"}, - {file = "pycares-4.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:eb66c30eb11e877976b7ead13632082a8621df648c408b8e15cdb91a452dd502"}, - {file = "pycares-4.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fd644505a8cfd7f6584d33a9066d4e3d47700f050ef1490230c962de5dfb28c6"}, - {file = "pycares-4.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:52084961262232ec04bd75f5043aed7e5d8d9695e542ff691dfef0110209f2d4"}, - {file = "pycares-4.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0c5368206057884cde18602580083aeaad9b860e2eac14fd253543158ce1e93"}, - {file = "pycares-4.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:112a4979c695b1c86f6782163d7dec58d57a3b9510536dcf4826550f9053dd9a"}, - {file = "pycares-4.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8d186dafccdaa3409194c0f94db93c1a5d191145a275f19da6591f9499b8e7b8"}, - {file = "pycares-4.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:64965dc19c578a683ea73487a215a8897276224e004d50eeb21f0bc7a0b63c88"}, - {file = "pycares-4.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ed2a38e34bec6f2586435f6ff0bc5fe11d14bebd7ed492cf739a424e81681540"}, - {file = "pycares-4.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:94d6962db81541eb0396d2f0dfcbb18cdb8c8b251d165efc2d974ae652c547d4"}, - {file = "pycares-4.4.0-cp312-cp312-win32.whl", hash = "sha256:1168a48a834813aa80f412be2df4abaf630528a58d15c704857448b20b1675c0"}, - {file = "pycares-4.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:db24c4e7fea4a052c6e869cbf387dd85d53b9736cfe1ef5d8d568d1ca925e977"}, - {file = "pycares-4.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:21a5a0468861ec7df7befa69050f952da13db5427ae41ffe4713bc96291d1d95"}, - {file = "pycares-4.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:22c00bf659a9fa44d7b405cf1cd69b68b9d37537899898d8cbe5dffa4016b273"}, - {file = "pycares-4.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23aa3993a352491a47fcf17867f61472f32f874df4adcbb486294bd9fbe8abee"}, - {file = "pycares-4.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:813d661cbe2e37d87da2d16b7110a6860e93ddb11735c6919c8a3545c7b9c8d8"}, - {file = "pycares-4.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77cf5a2fd5583c670de41a7f4a7b46e5cbabe7180d8029f728571f4d2e864084"}, - {file = "pycares-4.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3eaa6681c0a3e3f3868c77aca14b7760fed35fdfda2fe587e15c701950e7bc69"}, - {file = "pycares-4.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ad58e284a658a8a6a84af2e0b62f2f961f303cedfe551854d7bd40c3cbb61912"}, - {file = "pycares-4.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bfb89ca9e3d0a9b5332deeb666b2ede9d3469107742158f4aeda5ce032d003f4"}, - {file = "pycares-4.4.0-cp38-cp38-win32.whl", hash = "sha256:f36bdc1562142e3695555d2f4ac0cb69af165eddcefa98efc1c79495b533481f"}, - {file = "pycares-4.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:902461a92b6a80fd5041a2ec5235680c7cc35e43615639ec2a40e63fca2dfb51"}, - {file = "pycares-4.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7bddc6adba8f699728f7fc1c9ce8cef359817ad78e2ed52b9502cb5f8dc7f741"}, - {file = "pycares-4.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cb49d5805cd347c404f928c5ae7c35e86ba0c58ffa701dbe905365e77ce7d641"}, - {file = "pycares-4.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56cf3349fa3a2e67ed387a7974c11d233734636fe19facfcda261b411af14d80"}, - {file = "pycares-4.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bf2eaa83a5987e48fa63302f0fe7ce3275cfda87b34d40fef9ce703fb3ac002"}, - {file = "pycares-4.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82bba2ab77eb5addbf9758d514d9bdef3c1bfe7d1649a47bd9a0d55a23ef478b"}, - {file = "pycares-4.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c6a8bde63106f162fca736e842a916853cad3c8d9d137e11c9ffa37efa818b02"}, - {file = "pycares-4.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f5f646eec041db6ffdbcaf3e0756fb92018f7af3266138c756bb09d2b5baadec"}, - {file = "pycares-4.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9dc04c54c6ea615210c1b9e803d0e2d2255f87a3d5d119b6482c8f0dfa15b26b"}, - {file = "pycares-4.4.0-cp39-cp39-win32.whl", hash = "sha256:97892cced5794d721fb4ff8765764aa4ea48fe8b2c3820677505b96b83d4ef47"}, - {file = "pycares-4.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:917f08f0b5d9324e9a34211e68d27447c552b50ab967044776bbab7e42a553a2"}, - {file = "pycares-4.4.0.tar.gz", hash = "sha256:f47579d508f2f56eddd16ce72045782ad3b1b3b678098699e2b6a1b30733e1c2"}, + {file = "pycares-4.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13a82fad8239d6fbcf916099bee17d8b5666d0ddb77dace431e0f7961c9427ab"}, + {file = "pycares-4.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fefc7bebbe39b2e3b4b9615471233a8f7356b96129a7db9030313a3ae4ecc42d"}, + {file = "pycares-4.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e322e8ce810026f6e0c7c2a254b9ed02191ab8d42fa2ce6808ede1bdccab8e65"}, + {file = "pycares-4.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:723ba0803b016294430e40e544503fed9164949b694342c2552ab189e2b688ef"}, + {file = "pycares-4.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e48b20b59cdc929cc712a8b22e89c273256e482b49bb8999af98d2c6fc4563c2"}, + {file = "pycares-4.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6e55bd9af595b112ac6080ac0a0d52b5853d0d8e6d01ac65ff09e51e62490a"}, + {file = "pycares-4.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6f4b9063e3dd70460400367917698f209c10aabb68bf70b09e364895444487d"}, + {file = "pycares-4.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:95522d4840d702fd766439a7c7cd747935aa54cf0b8675e9fadd8414dd9dd0df"}, + {file = "pycares-4.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4709ce4fd9dbee24b1397f71a2adb3267323bb5ad5e7fde3f87873d172dd156"}, + {file = "pycares-4.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8addbf3408af1010f50fd67ef634a6cb239ccb9c534c32a40713f3b8d306a98e"}, + {file = "pycares-4.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:d0428ef42fcf575e197047e6a47892404faa34231902a453b3dfed66af4178b3"}, + {file = "pycares-4.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:aed5c2732f3a6bdbbfab202267d37044ca1162f690b9d34b7ece97ba43f27453"}, + {file = "pycares-4.5.0-cp310-cp310-win32.whl", hash = "sha256:b1859ea770a7abec40a6d02b5ab03c2396c4900c01f4e50ddb6c0dca4c2a6a7c"}, + {file = "pycares-4.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9f87d8da20a3a80ab05fe80c14a62bf078bd726ca6af609edbeb376fb97d50ab"}, + {file = "pycares-4.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ca7a1dba7b88290710db45012e0903c21c839fa0a2b9ddc100bba8e66bfb251"}, + {file = "pycares-4.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:160e92588cdf1a0fa3a7015f47990b508d50efd9109ea4d719dee31c058f0648"}, + {file = "pycares-4.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f38e45d23660ed1dafdb956fd263ae4735530ef1578aa2bf2caabb94cee4523"}, + {file = "pycares-4.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f742acc6d29a99ffc14e3f154b3848ea05c5533b71065e0f0a0fd99c527491b2"}, + {file = "pycares-4.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceaf71bcd7b6447705e689b8fee8836c20c6148511a90122981f524a84bfcca9"}, + {file = "pycares-4.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdc3c0be7b5b83e78e28818fecd0405bd401110dd6e2e66f7f10713c1188362c"}, + {file = "pycares-4.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd458ee69800195247aa19b5675c5914cbc091c5a220e4f0e96777a31bb555c1"}, + {file = "pycares-4.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a6649d713df73266708642fc3d04f110c0a66bee510fbce4cc5fed79df42083"}, + {file = "pycares-4.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ac57d7bda925c10b997434e7ce30a2c3689c2e96bab9fd0a1165d5577378eecd"}, + {file = "pycares-4.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ba17d8e5eeec4b2e0eb1a6a840bae9e62cd1c1c9cbc8dc9db9d1b9fdf33d0b54"}, + {file = "pycares-4.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9e9b7d1a8de703283e4735c0e532ba4bc600e88de872dcd1a9a4950cf74d9f4f"}, + {file = "pycares-4.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c6922ecbe458c13a4a2c1177bbce38abc44b5f086bc82115a92eab34418915f"}, + {file = "pycares-4.5.0-cp311-cp311-win32.whl", hash = "sha256:1004b8a17614e33410b4b1bb68360977667f1cc9ab2dbcfb27240d6703e4cb6a"}, + {file = "pycares-4.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c9c1055c622258a0f315560b2880a372363484b87cbef48af092624804caa72"}, + {file = "pycares-4.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:506efbe5017807747ccd1bdcb3c2f6e64635bc01fee01a50c0b97d649018c162"}, + {file = "pycares-4.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c469ec9fbe0526f45a98f67c1ea55be03abf30809c4f9c9be4bc93fb6806304d"}, + {file = "pycares-4.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597c0950ede240c3a779f023fcf2442207fc11e570d3ca4ccdbb0db5bbaf2588"}, + {file = "pycares-4.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9aa0da03c4df6ed0f87dd52a293bd0508734515041cc5be0f85d9edc1814914f"}, + {file = "pycares-4.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aea1ebf52767c777d10a1b3d03844b9b05cc892714b3ee177d5d9fbff74fb9fa"}, + {file = "pycares-4.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb20d84269ddffb177b6048e3bc03d0b9ffe17592093d900d5544805958d86b3"}, + {file = "pycares-4.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3125df81b657971ee5c0333f8f560ba0151db1eb7cf04aea7d783bb433b306c1"}, + {file = "pycares-4.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:525c77ea44546c12f379641aee163585d403cf50e29b04a06059d6aac894e956"}, + {file = "pycares-4.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1fd87cb26b317a9988abfcfa4e4dbc55d5f20177e5979ad4d854468a9246c187"}, + {file = "pycares-4.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a90aecd41188884e57ae32507a2c6b010c60b791a253083761bbb37a488ecaed"}, + {file = "pycares-4.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0d3de65cab653979dcc491e03f596566c9d40346c9deb088e0f9fe70600d8737"}, + {file = "pycares-4.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:27a77b43604b3ba24e4fc49fd3ea59f50f7d89c7255f1f1ea46928b26cccacfa"}, + {file = "pycares-4.5.0-cp312-cp312-win32.whl", hash = "sha256:6028cb8766f0fea1d2caa69fac23621fbe2cff9ce6968374e165737258703a33"}, + {file = "pycares-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:2ce10672c4cfd1c5fb6718e8b25f0336ca11c89aab88aa6df53dafc4e41df740"}, + {file = "pycares-4.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:011cd670da7caf55664c944abb71ec39af82b837f8d48da7cf0eec80f5682c4c"}, + {file = "pycares-4.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b5c67930497fb2b1dbcaa85f8c4188fc2cb62e41d787deeed2d33cfe9dd6bf52"}, + {file = "pycares-4.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d435a3b8468c656a7e7180dd7c4794510f6c612c33ad61a0fff6e440621f8b5"}, + {file = "pycares-4.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8371f5ee1efb33d6276e275d152c9c5605e5f2e58a9e168519ec1f9e13dd95ae"}, + {file = "pycares-4.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c76a9096fd5dc49c61c5235ea7032e8b43f4382800d64ca1e0e0cda700c082aa"}, + {file = "pycares-4.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b604af76b57469ff68b44e9e4c857eaee43bc5035f4f183f07f4f7149191fe1b"}, + {file = "pycares-4.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c589bd4f9160bfdb2f8080cf564bb120a4312cf091db07fe417f8e58a896a63c"}, + {file = "pycares-4.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:361262805bb09742c364ec0117842043c950339e38561009bcabbb6ac89458ef"}, + {file = "pycares-4.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6d2afb3c0776467055bf33db843ef483d25639be0f32e3a13ef5d4dc64098bf5"}, + {file = "pycares-4.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bc7a1d8ed7c7a4de17706a3c89b305b02eb64c778897e6727c043e5b9dd0d853"}, + {file = "pycares-4.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5703ec878b5c1efacdbf24ceaedfa606112fc67af5564f4db99c2c210f3ffadc"}, + {file = "pycares-4.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d87758e09dbf52c27ed7cf7bc7eaf8b3226217d10c52b03d61a14d59f40fcae1"}, + {file = "pycares-4.5.0-cp313-cp313-win32.whl", hash = "sha256:3316d490b4ce1a69f034881ac1ea7608f5f24ea5293db24ab574ac70b7d7e407"}, + {file = "pycares-4.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:018e700fb0d1a2db5ec96e404ffa85ed97cc96e96d6af0bb9548111e37cf36a3"}, + {file = "pycares-4.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:78c9890d93108c70708babee8a783e6021233f1f0a763d3634add6fd429aae58"}, + {file = "pycares-4.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba69f8123995aa3df99f6ebc726fc6a4b08e467a957b215c0a82749b901d5eed"}, + {file = "pycares-4.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32d33c4ffae31d1b544adebe0b9aee2be1fb18aedd3f4f91e41c495ccbafd6d8"}, + {file = "pycares-4.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17a060cfc469828abf7f5945964d505bd8c0a756942fee159538f7885169752e"}, + {file = "pycares-4.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1d0d5e69fa29e41b590a9dd5842454e8f34e2b928c92540aaf87e0161de8120"}, + {file = "pycares-4.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f096699c46f5dde2c7a8d91501a36d2d58500f4d63682e2ec14a0fed7cca6402"}, + {file = "pycares-4.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:429fe2065581a64a5f024f507b5f679bf37ea0ed39c3ba6289dba907e1c8a8f4"}, + {file = "pycares-4.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9ea2f6d48e64b413b97b41b47392087b452af9bf9f9d4d6d05305a159f45909f"}, + {file = "pycares-4.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:96d3aecd747a3fcd1e12c1ea1481b0813b4e0e80d40f314db7a86dda5bb1bd94"}, + {file = "pycares-4.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:32919f6eda7f5ea4df3e64149fc5792b0d455277d23d6d0fc365142062f35d80"}, + {file = "pycares-4.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:37add862461f9a3fc7ee4dd8b68465812b39456e21cebd5a33c414131ac05060"}, + {file = "pycares-4.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ed1d050d2c6d74a77c1b6c51fd99426cc000b4202a50d28d6ca75f7433099a6b"}, + {file = "pycares-4.5.0-cp39-cp39-win32.whl", hash = "sha256:887ac451ffe6e39ee46d3d0989c7bb829933d77e1dad5776511d825fc7e6a25b"}, + {file = "pycares-4.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c8b87c05740595bc8051dc98e51f022f003750e7da90f62f7a9fd50e330b196"}, + {file = "pycares-4.5.0.tar.gz", hash = "sha256:025b6c2ffea4e9fb8f9a097381c2fecb24aff23fbd6906e70da22ec9ba60e19d"}, ] [package.dependencies] diff --git a/scripts/build.py b/scripts/build.py index 535fc12..2336972 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -6,7 +6,7 @@ from cffi import FFI -from clean import clean_llama, clean_llama_cpp, clean +from clean import clean_llama_cpp, clean # if 'PYODIDE' in env and env['PYODIDE'] == '1': @@ -22,13 +22,13 @@ def clone_llama_cpp(): subprocess.run(['patch', 'llama.cpp/examples/llava/minicpmv-cli.cpp', 'minicpmv-cli_5.patch'], check=True) -def cuda_12_6_setup(*args, **kwargs): +def cuda_12_6_3_setup(*args, **kwargs): # # cuda env # - cuda_file = 'cuda_12.6.0_560.28.03_linux.run' - cuda_url = f'https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/{cuda_file}' - cuda_output_dir = os.path.abspath('./cuda-12.6') + cuda_file = 'cuda_12.6.3_560.35.05_linux.run' + cuda_url = f'https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/{cuda_file}' + cuda_output_dir = os.path.abspath('./cuda-12.6.3') cuda_file_path = os.path.join(cuda_output_dir, cuda_file) # download cuda file @@ -181,49 +181,50 @@ def cuda_12_4_1_setup(*args, **kwargs): def build_cpu(*args, **kwargs): # build static and shared library env = os.environ.copy() - env['CXXFLAGS'] = '-O3' + env['CXXFLAGS'] = '-O3 -DLLAMA_LIB' print('build_cpu:') pprint(env) - # - # build llama.cpp - # - subprocess.run([ - 'make', - '-C', - 'llama.cpp', - '-j', - 'llama-cli-static', - 'GGML_NO_OPENMP=1', - ], check=True, env=env) - - # - # cffi - # - ffibuilder = FFI() - - ffibuilder.cdef(''' - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); - ''') - - ffibuilder.set_source( - '_llama_cli_cpu', - ''' - #include - - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); - ''', - libraries=['stdc++'], - extra_objects=['../llama.cpp/llama_cli.a'], - extra_compile_args=['-O3'], - extra_link_args=['-O3', '-flto'], - ) - - ffibuilder.compile(tmpdir='build', verbose=True) + for name in ['llama', 'llava', 'minicpmv']: + # + # build llama.cpp + # + subprocess.run([ + 'make', + '-C', + 'llama.cpp', + '-j', + f'{name}-cli-static', + 'GGML_NO_OPENMP=1', + ], check=True, env=env) + + # + # cffi + # + ffibuilder = FFI() + + ffibuilder.cdef(f''' + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''') + + ffibuilder.set_source( + f'_{name}_cli_cpu', + f''' + #include + + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''', + libraries=['stdc++'], + extra_objects=[f'../llama.cpp/lib{name}_cli.a'], + extra_compile_args=['-O3'], + extra_link_args=['-O3', '-flto'], + ) + + ffibuilder.compile(tmpdir='build', verbose=True) # # copy compiled modules @@ -241,53 +242,54 @@ def build_cpu(*args, **kwargs): def build_vulkan_1_x(*args, **kwargs): # build static and shared library env = os.environ.copy() - env['CXXFLAGS'] = '-O3' + env['CXXFLAGS'] = '-O3 -DLLAMA_LIB' print('build_vulkan_1_x:') pprint(env) - # - # build llama.cpp - # - subprocess.run([ - 'make', - '-C', - 'llama.cpp', - '-j', - 'llama-cli-static', - 'GGML_NO_OPENMP=1', - 'GGML_VULKAN=1', - ], check=True, env=env) - - # - # cffi - # - ffibuilder = FFI() - - ffibuilder.cdef(''' - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''') - - ffibuilder.set_source( - '_llama_cli_vulkan_1_x', - ''' - #include - - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''', - libraries=[ - 'stdc++', - 'vulkan', - ], - extra_objects=['../llama.cpp/llama_cli.a'], - extra_compile_args=['-O3'], - extra_link_args=['-O3', '-flto'], - ) - - ffibuilder.compile(tmpdir='build', verbose=True) + for name in ['llama', 'llava', 'minicpmv']: + # + # build llama.cpp + # + subprocess.run([ + 'make', + '-C', + 'llama.cpp', + '-j', + f'{name}-cli-static', + 'GGML_NO_OPENMP=1', + 'GGML_VULKAN=1', + ], check=True, env=env) + + # + # cffi + # + ffibuilder = FFI() + + ffibuilder.cdef(f''' + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''') + + ffibuilder.set_source( + f'_{name}_cli_vulkan_1_x', + f''' + #include + + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''', + libraries=[ + 'stdc++', + 'vulkan', + ], + extra_objects=[f'../llama.cpp/lib{name}_cli.a'], + extra_compile_args=['-O3'], + extra_link_args=['-O3', '-flto'], + ) + + ffibuilder.compile(tmpdir='build', verbose=True) # # copy compiled modules @@ -302,85 +304,24 @@ def build_vulkan_1_x(*args, **kwargs): shutil.move(file, 'llama/') -def build_cpu_openblas(*args, **kwargs): - # build static and shared library - env = os.environ.copy() - env['CXXFLAGS'] = '-O3' - print('build_cpu_openblas:') - pprint(env) - - # - # build llama.cpp - # - subprocess.run([ - 'make', - '-C', - 'llama.cpp', - '-j', - 'llama-cli-static', - 'GGML_NO_OPENMP=1', - 'GGML_OPENBLAS=1', - ], check=True, env=env) - - # - # cffi - # - ffibuilder = FFI() - - ffibuilder.cdef(''' - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''') - - ffibuilder.set_source( - '_llama_cli_cpu_openblas', - ''' - #include - - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''', - libraries=[ - 'stdc++', - 'openblas', - ], - extra_objects=['../llama.cpp/llama_cli.a'], - extra_compile_args=['-O3'], - extra_link_args=['-O3', '-flto'], - ) - - ffibuilder.compile(tmpdir='build', verbose=True) - - # - # copy compiled modules - # - for file in glob.glob('build/*.so') + glob.glob('llama.cpp/*.so'): - shutil.move(file, 'llama/') - - for file in glob.glob('build/*.dll') + glob.glob('llama.cpp/*.dll'): - shutil.move(file, 'llama/') - - for file in glob.glob('build/*.dylib') + glob.glob('llama.cpp/*.dylib'): - shutil.move(file, 'llama/') - - -def build_linux_cuda_12_6(*args, **kwargs): +def build_linux_cuda_12_6_3(*args, **kwargs): # build static and shared library env = os.environ.copy() # # cuda env # - cuda_output_dir = cuda_12_6_setup() + cuda_output_dir = cuda_12_6_3_setup() env['PATH'] = f'{cuda_output_dir}/dist/bin:{env["PATH"]}' env['CUDA_PATH'] = f'{cuda_output_dir}/dist' + env['CC'] = 'gcc-13' + env['CXX'] = 'g++-13' + env['NVCC_PREPEND_FLAGS'] = '-ccbin /usr/bin/g++-13' env['CUDA_DOCKER_ARCH'] = 'compute_61' - env['CXXFLAGS'] = '-O3' - env['LD_LIBRARY_PATH'] = '/project/cuda-12.6/dist/lib64:/project/cuda-12.6/dist/targets/x86_64-linux/lib:/project/cuda-12.6/dist/lib64/stubs:$LD_LIBRARY_PATH' - env['CUDA_HOME'] = '/project/cuda-12.6/dist' + env['CXXFLAGS'] = '-O3 -DLLAMA_LIB' + env['LD_LIBRARY_PATH'] = '/project/cuda-12.6.3/dist/lib64:/project/cuda-12.6.3/dist/targets/x86_64-linux/lib:/project/cuda-12.6.3/dist/lib64/stubs:$LD_LIBRARY_PATH' + env['CUDA_HOME'] = '/project/cuda-12.6.3/dist' env['NVCCFLAGS'] = '\ -gencode arch=compute_70,code=sm_70 \ -gencode arch=compute_75,code=sm_75 \ @@ -389,61 +330,62 @@ def build_linux_cuda_12_6(*args, **kwargs): -gencode arch=compute_89,code=sm_89 \ -gencode arch=compute_90,code=sm_90' - print('build_linux_cuda_12_6:') + print('build_linux_cuda_12_6_3:') pprint(env) - # - # build llama.cpp - # - subprocess.run([ - 'make', - '-C', - 'llama.cpp', - '-j', - 'llama-cli-static', - 'GGML_NO_OPENMP=1', - 'GGML_CUDA=1', - ], check=True, env=env) - - # - # cffi - # - ffibuilder = FFI() - - ffibuilder.cdef(''' - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''') - - ffibuilder.set_source( - '_llama_cli_cuda_12_6', - ''' - #include - - typedef void (*_llama_yield_token_t)(const char * token); - typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); - ''', - libraries=[ - 'stdc++', - 'cuda', - 'cublas', - 'culibos', - 'cudart', - 'cublasLt', - ], - library_dirs=[ - f'{cuda_output_dir}/dist/lib64', - f'{cuda_output_dir}/dist/targets/x86_64-linux/lib', - f'{cuda_output_dir}/dist/lib64/stubs', - ], - extra_objects=['../llama.cpp/llama_cli.a'], - extra_compile_args=['-O3'], - extra_link_args=['-O3', '-flto'], - ) - - ffibuilder.compile(tmpdir='build', verbose=True) + for name in ['llama', 'llava', 'minicpmv']: + # + # build llama.cpp + # + subprocess.run([ + 'make', + '-C', + 'llama.cpp', + '-j', + f'{name}-cli-static', + 'GGML_NO_OPENMP=1', + 'GGML_CUDA=1', + ], check=True, env=env) + + # + # cffi + # + ffibuilder = FFI() + + ffibuilder.cdef(f''' + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''') + + ffibuilder.set_source( + f'_{name}_cli_cuda_12_6_3', + f''' + #include + + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _{name}_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); + ''', + libraries=[ + 'stdc++', + 'cuda', + 'cublas', + 'culibos', + 'cudart', + 'cublasLt', + ], + library_dirs=[ + f'{cuda_output_dir}/dist/lib64', + f'{cuda_output_dir}/dist/targets/x86_64-linux/lib', + f'{cuda_output_dir}/dist/lib64/stubs', + ], + extra_objects=[f'../llama.cpp/lib{name}_cli.a'], + extra_compile_args=['-O3'], + extra_link_args=['-O3', '-flto'], + ) + + ffibuilder.compile(tmpdir='build', verbose=True) # # copy compiled modules @@ -470,7 +412,7 @@ def build_linux_cuda_12_5_1(*args, **kwargs): env['PATH'] = f'{cuda_output_dir}/dist/bin:{env["PATH"]}' env['CUDA_PATH'] = f'{cuda_output_dir}/dist' env['CUDA_DOCKER_ARCH'] = 'compute_61' - env['CXXFLAGS'] = '-O3' + env['CXXFLAGS'] = '-O3 -DLLAMA_LIB' env['LD_LIBRARY_PATH'] = '/project/cuda-12.5.1/dist/lib64:/project/cuda-12.5.1/dist/targets/x86_64-linux/lib:/project/cuda-12.5.1/dist/lib64/stubs:$LD_LIBRARY_PATH' env['CUDA_HOME'] = '/project/cuda-12.5.1/dist' env['NVCCFLAGS'] = '\ @@ -505,7 +447,7 @@ def build_linux_cuda_12_5_1(*args, **kwargs): ffibuilder.cdef(''' typedef void (*_llama_yield_token_t)(const char * token); typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); ''') ffibuilder.set_source( @@ -515,7 +457,7 @@ def build_linux_cuda_12_5_1(*args, **kwargs): typedef void (*_llama_yield_token_t)(const char * token); typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); ''', libraries=[ 'stdc++', @@ -530,7 +472,7 @@ def build_linux_cuda_12_5_1(*args, **kwargs): f'{cuda_output_dir}/dist/targets/x86_64-linux/lib', f'{cuda_output_dir}/dist/lib64/stubs', ], - extra_objects=['../llama.cpp/llama_cli.a'], + extra_objects=['../llama.cpp/libllama_cli.a'], extra_compile_args=['-O3'], extra_link_args=['-O3', '-flto'], ) @@ -562,7 +504,7 @@ def build_linux_cuda_12_4_1(*args, **kwargs): env['PATH'] = f'{cuda_output_dir}/dist/bin:{env["PATH"]}' env['CUDA_PATH'] = f'{cuda_output_dir}/dist' env['CUDA_DOCKER_ARCH'] = 'compute_61' - env['CXXFLAGS'] = '-O3' + env['CXXFLAGS'] = '-O3 -DLLAMA_LIB' env['LD_LIBRARY_PATH'] = '/project/cuda-12.4.1/dist/lib64:/project/cuda-12.4.1/dist/targets/x86_64-linux/lib:/project/cuda-12.4.1/dist/lib64/stubs:$LD_LIBRARY_PATH' env['CUDA_HOME'] = '/project/cuda-12.4.1/dist' env['NVCCFLAGS'] = '\ @@ -597,7 +539,7 @@ def build_linux_cuda_12_4_1(*args, **kwargs): ffibuilder.cdef(''' typedef void (*_llama_yield_token_t)(const char * token); typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); ''') ffibuilder.set_source( @@ -607,7 +549,7 @@ def build_linux_cuda_12_4_1(*args, **kwargs): typedef void (*_llama_yield_token_t)(const char * token); typedef int (*_llama_should_stop_t)(void); - int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop); ''', libraries=[ 'stdc++', @@ -622,7 +564,7 @@ def build_linux_cuda_12_4_1(*args, **kwargs): f'{cuda_output_dir}/dist/targets/x86_64-linux/lib', f'{cuda_output_dir}/dist/lib64/stubs', ], - extra_objects=['../llama.cpp/llama_cli.a'], + extra_objects=['../llama.cpp/libllama_cli.a'], extra_compile_args=['-O3'], extra_link_args=['-O3', '-flto'], ) @@ -654,21 +596,16 @@ def build(*args, **kwargs): clean_llama_cpp() build_cpu(*args, **kwargs) - # # openblas - # if env.get('GGML_OPENBLAS', '1') != '0': - # clean_llama_cpp() - # build_cpu_openblas(*args, **kwargs) - # vulkan 1.x if env.get('GGML_VULKAN', '1') != '0' and env.get('AUDITWHEEL_ARCH') in ('x86_64', None): clean_llama_cpp() build_vulkan_1_x(*args, **kwargs) - # cuda 12.6 - if env.get('GGML_CUDA', '1') != '0': - if env.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and env.get('AUDITWHEEL_ARCH') in ('x86_64', None): - clean_llama_cpp() - build_linux_cuda_12_6(*args, **kwargs) + # # cuda 12.6.3 + # if env.get('GGML_CUDA', '1') != '0': + # if env.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and env.get('AUDITWHEEL_ARCH') in ('x86_64', None): + # clean_llama_cpp() + # build_linux_cuda_12_6_3(*args, **kwargs) # # cuda 12.5.1 # if env.get('GGML_CUDA', '1') != '0': @@ -682,5 +619,6 @@ def build(*args, **kwargs): # clean_llama_cpp() # build_linux_cuda_12_4_1(*args, **kwargs) + if __name__ == '__main__': build()