Skip to content

Commit

Permalink
Added:
Browse files Browse the repository at this point in the history
    - Support for default CPU tinyBLAS (llamafile, sgemm) builds
    - Support for CPU OpenBLAS (GGML_OPENBLAS) builds
  • Loading branch information
mtasic85 committed Jul 29, 2024
1 parent 8eb91c8 commit ec2aef5
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 65 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## v0.1.9

Added:
- Support for default CPU tinyBLAS (llamafile, sgemm) builds
- Support for CPU OpenBLAS (GGML_OPENBLAS) builds

Changed:
- Build scripts now have separate step/function `cuda_12_5_1_setup` which setups CUDA 12.5.1 env for build-time.

Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ pip install poetry
if [[ $AUDITWHEEL_PLAT == manylinux* ]]; then
dnf update -y
dnf install -y epel-release
dnf install -y unzip p7zip p7zip-plugins
dnf install -y unzip p7zip p7zip-plugins openblas openblas-devel
else
apk -U add upx unzip 7zip
apk -U add upx unzip 7zip openblas-dev
apk add --repository=https://dl-cdn.alpinelinux.org/alpine/v3.16/main/ libexecinfo-dev
fi
"""
# skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686"]
skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686", "*manylinux*"]
# skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*", "pp37-*", "pp38-*", "pp39-*", "pp310-*", "*i686"]
# skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "pp37-*", "pp38-*", "pp39-*", "*i686", "*manylinux*"]
skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*", "pp37-*", "pp38-*", "pp39-*", "pp310-*", "*i686"]
manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:latest"
manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:latest"
manylinux-pypy_x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:latest"
Expand Down
146 changes: 85 additions & 61 deletions scripts/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
from clean import clean_llama, clean_llama_cpp, clean


# if 'PYODIDE' in env and env['PYODIDE'] == '1':
# env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH '
# env['UNAME_M'] = 'wasm'


def clone_llama_cpp():
subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp.git'], check=True)
subprocess.run(['patch', 'llama.cpp/examples/main/main.cpp', 'main_3.patch'], check=True)
Expand Down Expand Up @@ -71,11 +76,7 @@ def build_cpu(*args, **kwargs):
# build static and shared library
env = os.environ.copy()
env['CXXFLAGS'] = '-O3'

# if 'PYODIDE' in env and env['PYODIDE'] == '1':
# env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH '
# env['UNAME_M'] = 'wasm'

print('build_cpu:')
pprint(env)

#
Expand All @@ -88,7 +89,6 @@ def build_cpu(*args, **kwargs):
'-j',
'llama-cli-static',
'GGML_NO_OPENMP=1',
'GGML_NO_LLAMAFILE=1',
], check=True, env=env)

#
Expand Down Expand Up @@ -132,21 +132,74 @@ def build_cpu(*args, **kwargs):
shutil.move(file, 'llama/')


def build_linux_cuda_12_5(*args, **kwargs):
def build_cpu_openblas(*args, **kwargs):
# build static and shared library
env = os.environ.copy()
env['CXXFLAGS'] = '-O3'
print('build_cpu_openblas:')
pprint(env)

# if 'PYODIDE' in env and env['PYODIDE'] == '1':
# env['CXXFLAGS'] += ' -msimd128 -fno-rtti -DNDEBUG -flto=full -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH '
# env['UNAME_M'] = 'wasm'
#
# build llama.cpp
#
subprocess.run([
'make',
'-C',
'llama.cpp',
'-j',
'llama-cli-static',
'GGML_NO_OPENMP=1',
'GGML_OPENBLAS=1',
], check=True, env=env)

#
# cffi
#
ffibuilder = FFI()

ffibuilder.cdef('''
typedef void (*_llama_yield_token_t)(const char * token);
typedef int (*_llama_should_stop_t)(void);
int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot);
''')

ffibuilder.set_source(
'_llama_cli_cpu_openblas',
'''
#include <stdio.h>
typedef void (*_llama_yield_token_t)(const char * token);
typedef int (*_llama_should_stop_t)(void);
int _llama_cli_main(int argc, char ** argv, _llama_yield_token_t _llama_yield_token, _llama_should_stop_t _llama_should_stop, int stop_on_bos_eos_eot);
''',
libraries=['stdc++'],
extra_objects=['../llama.cpp/llama_cli.a'],
extra_compile_args=['-O3'],
extra_link_args=['-O3', '-flto'],
)

ffibuilder.compile(tmpdir='build', verbose=True)

#
# copy compiled modules
#
for file in glob.glob('build/*.so') + glob.glob('llama.cpp/*.so'):
shutil.move(file, 'llama/')

for file in glob.glob('build/*.dll') + glob.glob('llama.cpp/*.dll'):
shutil.move(file, 'llama/')

for file in glob.glob('build/*.dylib') + glob.glob('llama.cpp/*.dylib'):
shutil.move(file, 'llama/')


def build_linux_cuda_12_5(*args, **kwargs):
# build static and shared library
env = os.environ.copy()

#
# cuda env
#
# cuda_file = 'cuda_12.5.1_555.42.06_linux.run'
# cuda_url = f'https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/{cuda_file}'
# cuda_output_dir = os.path.abspath('./cuda-12.5.1')
# cuda_file_path = os.path.join(cuda_output_dir, cuda_file)
cuda_output_dir = cuda_12_5_1_setup()

env['PATH'] = f'{cuda_output_dir}/dist/bin:{env["PATH"]}'
Expand All @@ -161,48 +214,9 @@ def build_linux_cuda_12_5(*args, **kwargs):
-gencode arch=compute_89,code=sm_89 \
-gencode arch=compute_90,code=sm_90'

print('build_linux_cuda_12_5:')
pprint(env)

# # download cuda file
# if not os.path.exists(cuda_file_path):
# cmd = ['mkdir', '-p', f'{cuda_output_dir}']
#
# subprocess.run(cmd, check=True)
# subprocess.run(['curl', '-o', cuda_file_path, cuda_url], check=True)

# # extract cuda file
# cmd = ['chmod', '+x', f'{cuda_output_dir}/{cuda_file}']
# subprocess.run(cmd, check=True)
#
# cmd = [
# f'{cuda_output_dir}/{cuda_file}',
# '--tar',
# 'mxf',
# '--wildcards',
# './builds/cuda_cccl/*',
# './builds/cuda_cudart/*',
# './builds/cuda_nvcc/*',
# './builds/libcublas/*',
# '-C',
# cuda_output_dir,
# ]
# subprocess.run(cmd, cwd=cuda_output_dir, check=True)
#
# cmd = ['mkdir', '-p', f'{cuda_output_dir}/dist']
# subprocess.run(cmd, check=True)
#
# cmd = f'cp -r {cuda_output_dir}/builds/cuda_cccl/* {cuda_output_dir}/dist'
# subprocess.run(cmd, shell=True, check=True)
#
# cmd = f'cp -r {cuda_output_dir}/builds/cuda_cudart/* {cuda_output_dir}/dist'
# subprocess.run(cmd, shell=True, check=True)
#
# cmd = f'cp -r {cuda_output_dir}/builds/cuda_nvcc/* {cuda_output_dir}/dist'
# subprocess.run(cmd, shell=True, check=True)
#
# cmd = f'cp -r {cuda_output_dir}/builds/libcublas/* {cuda_output_dir}/dist'
# subprocess.run(cmd, shell=True, check=True)

#
# build llama.cpp
#
Expand All @@ -213,7 +227,6 @@ def build_linux_cuda_12_5(*args, **kwargs):
'-j',
'llama-cli-static',
'GGML_NO_OPENMP=1',
'GGML_NO_LLAMAFILE=1',
'GGML_CUDA=1',
], check=True, env=env)

Expand Down Expand Up @@ -271,19 +284,30 @@ def build_linux_cuda_12_5(*args, **kwargs):


def build(*args, **kwargs):
env = os.environ.copy()
env['GGML_CPU'] = '1'
env['GGML_OPENBLAS'] = '0'
env['GGML_CUDA'] = '0'

# clean, clone
clean()
clone_llama_cpp()

# cuda 12.5
if os.environ.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and os.environ.get('AUDITWHEEL_ARCH') in ('x86_64', None):
# cpu
if env.get('GGML_CPU', '1') != '0':
clean_llama_cpp()
build_linux_cuda_12_5(*args, **kwargs)
build_cpu(*args, **kwargs)

# cpu
clean_llama_cpp()
build_cpu(*args, **kwargs)
# openblas
if env.get('GGML_OPENBLAS', '1') != '0':
clean_llama_cpp()
build_cpu_openblas(*args, **kwargs)

# cuda 12.5
if env.get('GGML_CUDA', '1') != '0':
if env.get('AUDITWHEEL_POLICY') in ('manylinux2014', 'manylinux_2_28', None) and env.get('AUDITWHEEL_ARCH') in ('x86_64', None):
clean_llama_cpp()
build_linux_cuda_12_5(*args, **kwargs)

if __name__ == '__main__':
build()

0 comments on commit ec2aef5

Please sign in to comment.