From ec2aef52e940286f9327e3efadf854c25966dc20 Mon Sep 17 00:00:00 2001 From: Marko Tasic Date: Mon, 29 Jul 2024 17:49:55 +0200 Subject: [PATCH] Added: - Support for default CPU tinyBLAS (llamafile, sgemm) builds - Support for CPU OpenBLAS (GGML_OPENBLAS) builds --- CHANGELOG.md | 4 ++ pyproject.toml | 8 +-- scripts/build.py | 146 +++++++++++++++++++++++++++-------------------- 3 files changed, 93 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffa0193..860c107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## v0.1.9 +Added: + - Support for default CPU tinyBLAS (llamafile, sgemm) builds + - Support for CPU OpenBLAS (GGML_OPENBLAS) builds + Changed: - Build scripts now have separate step/function `cuda_12_5_1_setup` which setups CUDA 12.5.1 env for build-time. diff --git a/pyproject.toml b/pyproject.toml index f5e1fcd..0dfd971 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,15 +51,15 @@ pip install poetry if [[ $AUDITWHEEL_PLAT == manylinux* ]]; then dnf update -y dnf install -y epel-release - dnf install -y unzip p7zip p7zip-plugins + dnf install -y unzip p7zip p7zip-plugins openblas openblas-devel else - apk -U add upx unzip 7zip + apk -U add upx unzip 7zip openblas-dev apk add --repository=https://dl-cdn.alpinelinux.org/alpine/v3.16/main/ libexecinfo-dev fi """ # skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686"] -skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686", "*manylinux*"] -# skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*", "pp37-*", "pp38-*", "pp39-*", "pp310-*", "*i686"] +# skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686", "*manylinux*"] +skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*", "pp37-*", "pp38-*", "pp39-*", "pp310-*", "*i686"] manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:latest" manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:latest" manylinux-pypy_x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:latest" diff --git a/scripts/build.py b/scripts/build.py index 37caf8b..73765c8 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -9,6 +9,11 @@ from clean import clean_llama, clean_llama_cpp, clean +# if 'PYODIDE' in env and env['PYODIDE'] == '1': +# env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH ' +# env['UNAME_M'] = 'wasm' + + def clone_llama_cpp(): subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp.git'], check=True) subprocess.run(['patch', 'llama.cpp/examples/main/main.cpp', 'main_3.patch'], check=True) @@ -71,11 +76,7 @@ def build_cpu(*args, **kwargs): # build static and shared library env = os.environ.copy() env['CXXFLAGS'] = '-O3' - - # if 'PYODIDE' in env and env['PYODIDE'] == '1': - # env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH ' - # env['UNAME_M'] = 'wasm' - + print('build_cpu:') pprint(env) # @@ -88,7 +89,6 @@ def build_cpu(*args, **kwargs): '-j', 'llama-cli-static', 'GGML_NO_OPENMP=1', - 'GGML_NO_LLAMAFILE=1', ], check=True, env=env) # @@ -132,21 +132,74 @@ def build_cpu(*args, **kwargs): shutil.move(file, 'llama/') -def build_linux_cuda_12_5(*args, **kwargs): +def build_cpu_openblas(*args, **kwargs): # build static and shared library env = os.environ.copy() + env['CXXFLAGS'] = '-O3' + print('build_cpu_openblas:') + pprint(env) - # if 'PYODIDE' in env and env['PYODIDE'] == '1': - # env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH ' - # env['UNAME_M'] = 'wasm' + # + # build llama.cpp + # + subprocess.run([ + 'make', + '-C', + 'llama.cpp', + '-j', + 'llama-cli-static', + 'GGML_NO_OPENMP=1', + 'GGML_OPENBLAS=1', + ], check=True, env=env) + + # + # cffi + # + ffibuilder = FFI() + + ffibuilder.cdef(''' + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + ''') + + ffibuilder.set_source( + '_llama_cli_cpu_openblas', + ''' + #include + + typedef void (*_llama_yield_token_t)(const char * token); + typedef int (*_llama_should_stop_t)(void); + int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot); + ''', + libraries=['stdc++'], + extra_objects=['../llama.cpp/llama_cli.a'], + extra_compile_args=['-O3'], + extra_link_args=['-O3', '-flto'], + ) + + ffibuilder.compile(tmpdir='build', verbose=True) + + # + # copy compiled modules + # + for file in glob.glob('build/*.so') + glob.glob('llama.cpp/*.so'): + shutil.move(file, 'llama/') + + for file in glob.glob('build/*.dll') + glob.glob('llama.cpp/*.dll'): + shutil.move(file, 'llama/') + + for file in glob.glob('build/*.dylib') + glob.glob('llama.cpp/*.dylib'): + shutil.move(file, 'llama/') + + +def build_linux_cuda_12_5(*args, **kwargs): + # build static and shared library + env = os.environ.copy() # # cuda env # - # cuda_file = 'cuda_12.5.1_555.42.06_linux.run' - # cuda_url = f'https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/{cuda_file}' - # cuda_output_dir = os.path.abspath('./cuda-12.5.1') - # cuda_file_path = os.path.join(cuda_output_dir, cuda_file) cuda_output_dir = cuda_12_5_1_setup() env['PATH'] = f'{cuda_output_dir}/dist/bin:{env["PATH"]}' @@ -161,48 +214,9 @@ def build_linux_cuda_12_5(*args, **kwargs): -gencode arch=compute_89,code=sm_89 \ -gencode arch=compute_90,code=sm_90' + print('build_linux_cuda_12_5:') pprint(env) - # # download cuda file - # if not os.path.exists(cuda_file_path): - # cmd = ['mkdir', '-p', f'{cuda_output_dir}'] - # - # subprocess.run(cmd, check=True) - # subprocess.run(['curl', '-o', cuda_file_path, cuda_url], check=True) - - # # extract cuda file - # cmd = ['chmod', '+x', f'{cuda_output_dir}/{cuda_file}'] - # subprocess.run(cmd, check=True) - # - # cmd = [ - # f'{cuda_output_dir}/{cuda_file}', - # '--tar', - # 'mxf', - # '--wildcards', - # './builds/cuda_cccl/*', - # './builds/cuda_cudart/*', - # './builds/cuda_nvcc/*', - # './builds/libcublas/*', - # '-C', - # cuda_output_dir, - # ] - # subprocess.run(cmd, cwd=cuda_output_dir, check=True) - # - # cmd = ['mkdir', '-p', f'{cuda_output_dir}/dist'] - # subprocess.run(cmd, check=True) - # - # cmd = f'cp -r {cuda_output_dir}/builds/cuda_cccl/* {cuda_output_dir}/dist' - # subprocess.run(cmd, shell=True, check=True) - # - # cmd = f'cp -r {cuda_output_dir}/builds/cuda_cudart/* {cuda_output_dir}/dist' - # subprocess.run(cmd, shell=True, check=True) - # - # cmd = f'cp -r {cuda_output_dir}/builds/cuda_nvcc/* {cuda_output_dir}/dist' - # subprocess.run(cmd, shell=True, check=True) - # - # cmd = f'cp -r {cuda_output_dir}/builds/libcublas/* {cuda_output_dir}/dist' - # subprocess.run(cmd, shell=True, check=True) - # # build llama.cpp # @@ -213,7 +227,6 @@ def build_linux_cuda_12_5(*args, **kwargs): '-j', 'llama-cli-static', 'GGML_NO_OPENMP=1', - 'GGML_NO_LLAMAFILE=1', 'GGML_CUDA=1', ], check=True, env=env) @@ -271,19 +284,30 @@ def build_linux_cuda_12_5(*args, **kwargs): def build(*args, **kwargs): + env = os.environ.copy() + env['GGML_CPU'] = '1' + env['GGML_OPENBLAS'] = '0' + env['GGML_CUDA'] = '0' + # clean, clone clean() clone_llama_cpp() - # cuda 12.5 - if os.environ.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and os.environ.get('AUDITWHEEL_ARCH') in ('x86_64', None): + # cpu + if env.get('GGML_CPU', '1') != '0': clean_llama_cpp() - build_linux_cuda_12_5(*args, **kwargs) + build_cpu(*args, **kwargs) - # cpu - clean_llama_cpp() - build_cpu(*args, **kwargs) + # openblas + if env.get('GGML_OPENBLAS', '1') != '0': + clean_llama_cpp() + build_cpu_openblas(*args, **kwargs) + # cuda 12.5 + if env.get('GGML_CUDA', '1') != '0': + if env.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and env.get('AUDITWHEEL_ARCH') in ('x86_64', None): + clean_llama_cpp() + build_linux_cuda_12_5(*args, **kwargs) if __name__ == '__main__': build()