[LLM] Support mixed_fp8 on Arc (#9415) #4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: LLM Binary Build | |
# Cancel previous runs in the PR when you push new commits | |
# concurrency: | |
# group: ${{ github.workflow }}-llm-binary-build-${{ github.event.pull_request.number || github.run_id }} | |
# cancel-in-progress: false | |
# Controls when the action will run. | |
on: | |
# Triggers the workflow on push or pull request events but only for the main branch | |
push: | |
branches: [main] | |
paths: | |
- ".github/workflows/llm-binary-build.yml" | |
pull_request: | |
branches: [main] | |
paths: | |
- ".github/workflows/llm-binary-build.yml" | |
workflow_dispatch: | |
inputs: | |
llmcpp-ref: | |
description: 'Ref of llm.cpp code' | |
default: '' | |
required: false | |
type: string | |
workflow_call: | |
inputs: | |
llmcpp-ref: | |
description: 'Ref of llm.cpp code' | |
default: '' | |
required: false | |
type: string | |
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | |
jobs: | |
check-linux-avxvnni-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: linux-avxvnni | |
linux-build-avxvnni: | |
runs-on: [self-hosted, AVX2, almalinux8] | |
needs: check-linux-avxvnni-artifact | |
if: needs.check-linux-avxvnni-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set access token | |
run: | | |
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" | |
- name: Install Build Environment | |
shell: bash | |
run: | | |
export http_proxy=${HTTP_PROXY} | |
export https_proxy=${HTTPS_PROXY} | |
yum install -y gcc-toolset-11 cmake git | |
conda remove -n python39 --all -y | |
conda create -n python39 python=3.9 -y | |
conda remove -n python310 --all -y | |
conda create -n python310 python=3.10 -y | |
conda remove -n python311 --all -y | |
conda create -n python311 python=3.11 -y | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Build binary | |
shell: bash | |
run: | | |
scl enable gcc-toolset-11 "cmake -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move release binary | |
shell: bash | |
run: | | |
mkdir release | |
mv build/main-bloom release/main-bloom | |
mv build/libbloom-api.so release/libbloom-api.so | |
mv build/quantize-bloom release/quantize-bloom | |
mv build/libbloom.so release/libbloom_avxvnni.so | |
mv build/main-llama release/main-llama | |
mv build/libllama-api.so release/libllama-api.so | |
mv build/quantize-llama release/quantize-llama | |
mv build/libllama.so release/libllama_avxvnni.so | |
mv build/main-gptneox release/main-gptneox | |
mv build/libgptneox-api.so release/libgptneox-api.so | |
mv build/quantize-gptneox release/quantize-gptneox | |
mv build/libgptneox.so release/libgptneox_avxvnni.so | |
mv build/main-starcoder release/main-starcoder | |
mv build/libstarcoder-api.so release/libstarcoder-api.so | |
mv build/quantize-starcoder release/quantize-starcoder | |
mv build/libstarcoder.so release/libstarcoder_avxvnni.so | |
- name: Build Chatglm | |
shell: bash | |
run: | | |
source activate python39 || conda activate python39 | |
cd src/chatglm | |
scl enable gcc-toolset-11 "cmake -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move Chatglm binaries | |
shell: bash | |
run: | | |
mv src/chatglm/build/main release/main-chatglm_vnni | |
mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so release/chatglm_C.cpython-39-x86_64-linux-gnu.so | |
- name: Build Chatglm Py310 | |
shell: bash | |
run: | | |
source activate python310 || conda activate python310 | |
cd src/chatglm | |
rm -r build | |
scl enable gcc-toolset-11 "cmake -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move Chatglm binaries Py310 | |
shell: bash | |
run: | | |
mv src/chatglm/build/_C.cpython-310-x86_64-linux-gnu.so release/chatglm_C.cpython-310-x86_64-linux-gnu.so | |
- name: Build Chatglm Py311 | |
shell: bash | |
run: | | |
source activate python311 || conda activate python311 | |
cd src/chatglm | |
rm -r build | |
scl enable gcc-toolset-11 "cmake -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move Chatglm binaries Py311 | |
shell: bash | |
run: | | |
mv src/chatglm/build/_C.cpython-311-x86_64-linux-gnu.so release/chatglm_C.cpython-311-x86_64-linux-gnu.so | |
- name: Archive build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: linux-avxvnni | |
path: | | |
release | |
- name: Clean up test environment | |
shell: bash | |
run: | | |
make clean | |
conda remove -n python39 --all -y | |
conda remove -n python310 --all -y | |
conda remove -n python311 --all -y | |
check-linux-avx512-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: linux-avx512 | |
linux-build-avx512: | |
runs-on: [self-hosted, AVX512, almalinux8] | |
needs: check-linux-avx512-artifact | |
if: needs.check-linux-avx512-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set access token | |
run: | | |
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" | |
- name: Install Build Environment | |
shell: bash | |
run: | | |
export http_proxy=${HTTP_PROXY} | |
export https_proxy=${HTTPS_PROXY} | |
yum install -y gcc-toolset-11 cmake git | |
conda remove -n python39 --all -y | |
conda create -n python39 python=3.9 -y | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Build avx512 binary | |
shell: bash | |
run: | | |
scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move avx512 release binary | |
shell: bash | |
run: | | |
mkdir avx512_release | |
mv build/quantize-bloom avx512_release/quantize-bloom_avx512 | |
mv build/libbloom.so avx512_release/libbloom_avx512.so | |
mv build/quantize-llama avx512_release/quantize-llama_avx512 | |
mv build/libllama.so avx512_release/libllama_avx512.so | |
mv build/quantize-gptneox avx512_release/quantize-gptneox_avx512 | |
mv build/libgptneox.so avx512_release/libgptneox_avx512.so | |
mv build/quantize-starcoder avx512_release/quantize-starcoder_avx512 | |
mv build/libstarcoder.so avx512_release/libstarcoder_avx512.so | |
- name: Build avx2 binary | |
shell: bash | |
run: | | |
scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=ON -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move avx2 release binary | |
shell: bash | |
run: | | |
mkdir avx2_release | |
mv build/libbloom.so avx2_release/libbloom_avx2.so | |
mv build/libllama.so avx2_release/libllama_avx2.so | |
mv build/libgptneox.so avx2_release/libgptneox_avx2.so | |
mv build/libstarcoder.so avx2_release/libstarcoder_avx2.so | |
- name: Build avx binary | |
shell: bash | |
run: | | |
scl enable gcc-toolset-11 "cmake -DONLYAVX=ON -DONLYAVX2=OFF -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move avx release binary | |
shell: bash | |
run: | | |
mkdir avx_release | |
mv build/libbloom.so avx_release/libbloom_avx.so | |
mv build/libllama.so avx_release/libllama_avx.so | |
mv build/libgptneox.so avx_release/libgptneox_avx.so | |
mv build/libstarcoder.so avx_release/libstarcoder_avx.so | |
- name: Archive avx512 build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: linux-avx512 | |
path: | | |
avx512_release | |
- name: Archive avx2 build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: linux-avx2 | |
path: | | |
avx2_release | |
- name: Archive avx build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: linux-avx | |
path: | | |
avx_release | |
- name: Clean up test environment | |
shell: bash | |
run: | | |
make clean | |
conda remove -n python39 --all -y | |
check-linux-amx-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: linux-amx | |
linux-build-amx: | |
runs-on: [self-hosted, amx, almalinux8] | |
needs: check-linux-amx-artifact | |
if: needs.check-linux-amx-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set access token | |
run: | | |
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" | |
- name: Install Build Environment | |
shell: bash | |
run: | | |
export http_proxy=${HTTP_PROXY} | |
export https_proxy=${HTTPS_PROXY} | |
yum install -y gcc-toolset-11 cmake git | |
conda remove -n python39 --all -y | |
conda create -n python39 python=3.9 -y | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Build amx binary | |
shell: bash | |
run: | | |
scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
# build chatglm | |
source activate python39 || conda activate python39 | |
cd src/chatglm | |
scl enable gcc-toolset-11 "cmake -B build" | |
scl enable gcc-toolset-11 "cmake --build build --config Release -j" | |
- name: Move amx release binary | |
shell: bash | |
run: | | |
mkdir amx_release | |
mv build/quantize-bloom amx_release/quantize-bloom_amx | |
mv build/libbloom.so amx_release/libbloom_amx.so | |
mv build/quantize-llama amx_release/quantize-llama_amx | |
mv build/libllama.so amx_release/libllama_amx.so | |
mv build/quantize-gptneox amx_release/quantize-gptneox_amx | |
mv build/libgptneox.so amx_release/libgptneox_amx.so | |
mv build/quantize-starcoder amx_release/quantize-starcoder_amx | |
mv build/libstarcoder.so amx_release/libstarcoder_amx.so | |
# chatglm binary files | |
mv src/chatglm/build/main amx_release/main-chatglm_amx | |
# mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so amx_release/chatglm_C.cpython-39-x86_64-linux-gnu.so | |
- name: Archive amx build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: linux-amx | |
path: | | |
amx_release | |
- name: Clean up test environment | |
shell: bash | |
run: | | |
make clean | |
conda remove -n python39 --all -y | |
check-windows-avx2-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: windows-avx2 | |
windows-build-avx2: | |
runs-on: [self-hosted, Windows, AVX-VNNI-Build] | |
needs: check-windows-avx2-artifact | |
if: needs.check-windows-avx2-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set access token | |
run: | | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Add msbuild to PATH | |
uses: microsoft/[email protected] | |
with: | |
msbuild-architecture: x64 | |
- name: Add cmake to PATH | |
uses: ilammy/msvc-dev-cmd@v1 | |
- name: Build binary | |
shell: powershell | |
run: | | |
cmake . | |
cmake --build . --config Release -j | |
- name: Archive build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: windows-avx2 | |
path: | | |
build/Release | |
check-windows-avx-vnni-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: windows-avx-vnni | |
windows-build-avx-vnni: | |
runs-on: [self-hosted, Windows, AVX-VNNI-Build] | |
needs: check-windows-avx-vnni-artifact | |
if: needs.check-windows-avx-vnni-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.9" | |
- name: Set access token | |
run: | | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Add msbuild to PATH | |
uses: microsoft/[email protected] | |
with: | |
msbuild-architecture: x64 | |
- name: Add cmake to PATH | |
uses: ilammy/msvc-dev-cmd@v1 | |
- name: Build binary | |
shell: powershell | |
run: | | |
cmake -DAVXVNNI=ON . | |
cmake --build . --config Release -j | |
- name: Move release binary | |
shell: powershell | |
run: | | |
if (Test-Path ./release) { rm -r -fo release } | |
mkdir release | |
# mv build/Release/main-bloom.exe release/main-bloom_vnni.exe | |
mv build/Release/quantize-bloom.exe release/quantize-bloom_vnni.exe | |
mv build/Release/bloom.dll release/libbloom_vnni.dll | |
# mv build/Release/main-llama.exe release/main-llama_vnni.exe | |
mv build/Release/quantize-llama.exe release/quantize-llama_vnni.exe | |
mv build/Release/llama.dll release/libllama_vnni.dll | |
# mv build/Release/main-gptneox.exe release/main-gptneox_vnni.exe | |
mv build/Release/quantize-gptneox.exe release/quantize-gptneox_vnni.exe | |
mv build/Release/gptneox.dll release/libgptneox_vnni.dll | |
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe | |
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe | |
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll | |
- name: Build Chatglm | |
shell: powershell | |
run: | | |
cd src/chatglm | |
cmake -DAVXVNNI=ON -B build | |
cmake --build build --config Release -j | |
- name: Move Chatglm binaries | |
shell: powershell | |
run: | | |
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe | |
mv src/chatglm/build/Release/_C.cp39-win_amd64.pyd release/chatglm_C.cp39-win_amd64.pyd | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Build Chatglm Py310 | |
shell: powershell | |
run: | | |
cd src/chatglm | |
rm -r build | |
cmake -DAVXVNNI=ON -B build | |
cmake --build build --config Release -j | |
- name: Move Chatglm binaries Py310 | |
shell: powershell | |
run: | | |
mv src/chatglm/build/Release/_C.cp310-win_amd64.pyd release/chatglm_C.cp310-win_amd64.pyd | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.11" | |
- name: Build Chatglm Py311 | |
shell: powershell | |
run: | | |
cd src/chatglm | |
rm -r build | |
cmake -DAVXVNNI=ON -B build | |
cmake --build build --config Release -j | |
- name: Move Chatglm binaries Py311 | |
shell: powershell | |
run: | | |
mv src/chatglm/build/Release/_C.cp311-win_amd64.pyd release/chatglm_C.cp311-win_amd64.pyd | |
- name: Archive build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: windows-avx-vnni | |
path: | | |
release | |
check-windows-avx-artifact: | |
runs-on: ubuntu-latest | |
outputs: | |
if-exists: ${{steps.check_artifact.outputs.exists}} | |
steps: | |
- name: Check if built | |
id: check_artifact | |
uses: xSAVIKx/artifact-exists-action@v0 | |
with: | |
name: windows-avx | |
windows-build-avx: | |
runs-on: [self-hosted, Windows, AVX-VNNI-Build] | |
needs: check-windows-avx-artifact | |
if: needs.check-windows-avx-artifact.outputs.if-exists == 'false' | |
steps: | |
- name: Set access token | |
run: | | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV | |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" | |
- uses: actions/checkout@v3 | |
with: | |
repository: "intel-analytics/llm.cpp" | |
ref: ${{ inputs.llmcpp-ref }} | |
token: ${{ env.github_access_token }} | |
submodules: "recursive" | |
- name: Add msbuild to PATH | |
uses: microsoft/[email protected] | |
with: | |
msbuild-architecture: x64 | |
- name: Add cmake to PATH | |
uses: ilammy/msvc-dev-cmd@v1 | |
- name: Build binary | |
shell: powershell | |
run: | | |
cmake -DONLYAVX=ON . | |
cmake --build . --config Release -j | |
- name: Move release binary | |
shell: powershell | |
run: | | |
if (Test-Path ./release) { rm -r -fo release } | |
mkdir release | |
mv build/Release/bloom.dll release/libbloom_avx.dll | |
mv build/Release/llama.dll release/libllama_avx.dll | |
mv build/Release/gptneox.dll release/libgptneox_avx.dll | |
mv build/Release/starcoder.dll release/libstarcoder_avx.dll | |
- name: Archive build files | |
uses: actions/upload-artifact@v3 | |
with: | |
name: windows-avx | |
path: | | |
release |