diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile index 1f7c543a47e3cb..db5ba2f25ea672 100644 --- a/.devops/llama-cli-cann.Dockerfile +++ b/.devops/llama-cli-cann.Dockerfile @@ -7,8 +7,8 @@ WORKDIR /app COPY . . RUN yum install -y gcc g++ cmake make -ENV LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:$LIBRARY_PATH ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} @@ -17,17 +17,28 @@ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} +# find libascend_hal.so, because the drive hasn`t been mounted. +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH RUN echo "Building with static libs" && \ - # source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ - cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}\lib64 && \ + source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \ + cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \ cmake --build build --config Release --target llama-cli # TODO: use image with NNRT FROM cosdt/cann:$ASCEND_VERSION AS runtime - COPY --from=build /app/build/bin/llama-cli /llama-cli ENV LC_ALL=C.utf8 -ENTRYPOINT [ "/llama-cli" ] +ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} +ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} +ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} +ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp +ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit +ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} + +ENTRYPOINT ["/llama-cli" ] diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index a31ecaabdaf633..7cba113a92ced6 100644 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -120,12 +120,11 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi ## Docker -### Get Images -You can get a pre-build image at cosdt/cann:8.0.rc2.alpha003-910b-openeuler22.03-py3.8-llama.cpp and use llama-cli directly without building llama.cpp in this image. +### Build Images +You can get a image with llama.cpp in one command. ```sh -docker pull cosdt/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.8-llama.cpp +docker build -t llama-cpp-cann -f .devops/llama-cli-cann.Dockerfile . ``` -!!!!!! Add content for build image or get pre-build image. ### Run container @@ -134,8 +133,8 @@ docker pull cosdt/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.8-llama.cpp npu-smi info # Select the cards that you want to use, make sure these cards are not used by someone. -# Following using cards of device0 and device1. -docker run --name llamacpp --device /dev/davinci0 --device /dev/davinci1 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info -v /PATH_TO_YOUR_MODELS/:/app/models -itd cosdt/cann:8.0.rc2.alpha003-910b-ubuntu22.04-py3.8-llama.cpp -m /app/models/MODEL_PATH -ngl 32 -p "Building a website can be done in 10 simple steps:" +# Following using cards of device0. +docker run --name llamacpp --device /dev/davinci0 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info -v /PATH_TO_YOUR_MODELS/:/app/models -it llama-cpp-cann -m /app/models/MODEL_PATH -ngl 32 -p "Building a website can be done in 10 simple steps:" ``` *Notes:* @@ -234,8 +233,6 @@ cmake --build build --config release - Single device: Use one device target specified by the user. - Multiple devices: Automatically choose the devices with the same backend. - In two device selection modes, the default SYCL backend is level_zero, you can choose other backend supported by SYCL by setting environment variable ONEAPI_DEVICE_SELECTOR. - | Device selection | Parameter | |:----------------:|:--------------------------------------:| | Single device | --split-mode none --main-gpu DEVICE_ID | diff --git a/docs/build.md b/docs/build.md index 0a815cf4578f67..152d46d6f31af3 100644 --- a/docs/build.md +++ b/docs/build.md @@ -365,7 +365,7 @@ cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release cmake --build build --config release ``` -`llama.cpp based on CANN` does not currently supprot quantization. You can test with: +You can test with: `./build/llama-cli -m PATH_TO_MODEL -p "Building a website can be done in 10 steps:" -ngl 32`