diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index a6d45d249c2f5a..a31ecaabdaf633 100644 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -1,13 +1,16 @@ # llama.cpp for CANN -- [Background](#background) -- [News](#news) -- [OS](#os) -- [Hardware](#hardware) -- [Model Supports](#model-supports) -- [Datatype Supports](#datatype-supports) -- [Linux](#linux) -- [TODO](#todo) + - [Background](#background) + - [News](#news) + - [OS](#os) + - [Hardware](#hardware) + - [Model Supports](#model-supports) + - [DataType Supports](#datatype-supports) + - [Docker](#docker) + - [Linux](#linux) + - [TODO](#todo) + + ## Background @@ -50,24 +53,61 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi ## Model Supports -| Model Name | Status | -|:-----------------------------:|:-------:| -| Baichuan | Support | -| Baichuan 2 | Support | -| Bloom | Support | -| Falcon 2 | Support | -| Gpt 2 | Support | -| InternLM 2 | Support | -| Llama 2 | Support | -| Llama 3 | Support | -| Mamba | Support | -| Mistral 7B | Support | -| OLMo | Support | -| Phi 3 | Support | -| Qwen 2 | Support | -| Refact | Support | -| Starcoder | Support | -| Yi | Support | +| Model Name | FP16 | Q8_0 | Q4_0 | +|:----------------------------|:-----:|:----:|:----:| +| AquilaChat2-7B | √ | √ | √ | +| Baichuan-7b | √ | √ | √ | +| Baichuan2-7B-Chat | √ | √ | √ | +| bitnet_b1_58-large | √ | √ | √ | +| bloom-560m | √ | x | √ | +| bloomz-alpaca-560m | √ | x | √ | +| c4ai-command-r-35B-v01 | x | x | x | +| chatglm3-6B | x | x | x | +| chinese-alpaca-2-1.3b | √ | √ | √ | +| CodeShell-7B | √ | √ | √ | +| deepseek-ai_deepseek-coder-1.3B-base | x | x | x | +| deepseek-ai_DeepSeek-V2-Lite | x | x | x | +| deepseek-coder-6.7B-instruct | x | x | x | +| DeepSeek-V2-Lite-64x1.5B | x | x | x | +| falcon-7b-instruct | √ | √ | √ | +| flan-t5-large | √ | √ | √ | +| gemma-2-9b-it | √ | √ | √ | +| glm-4-9B | x | x | x | +| gpt2 | √ | √ | √ | +| Gpt2-163M | √ | √ | √ | +| granite-3B-code-instruct | √ | √ | √ | +| GritLM-7B | √ | √ | √ | +| internlm2_5-7b-chat | √ | √ | √ | +| koala-7B-HF | √ | √ | √ | +| Llama-2-7b-chat-hf | √ | √ | √ | +| Llama-3-Smaug-8B | √ | √ | √ | +| Llama2-Chinese-7b-Chat | √ | √ | √ | +| Llama3-8B | √ | √ | √ | +| Llama3-8b-chinese | √ | √ | √ | +| mamba-130m-hf | √ | √ | √ | +| Mistral-7B-Instruct-v0.2 | √ | √ | √ | +| Mixtral-8x7B-Instruct-v0.1 | x | √ | √ | +| mpt-7B | √ | √ | √ | +| OLMo-1B-hf | √ | √ | √ | +| OpenELM-3B-Instruct | √ | √ | √ | +| Orion-14b-base | √ | √ | √ | +| phi1 | x | x | x | +| phi2 | x | x | x | +| Phi-3-mini-4k-instruct | √ | √ | √ | +| plamo-13b | √ | √ | √ | +| pythia-70M | x | x | x | +| Qwen-7B | √ | √ | √ | +| Qwen2-1.5B-Instruct | √ | x | √ | +| Refact-1_6B-fim | √ | √ | √ | +| SmolLM-135M | √ | √ | √ | +| stablelm-zephyr | x | x | x | +| stablelm-2-zephyr-1_6b | x | x | x | +| starcoderbase-1b | √ | √ | √ | +| starcoder2-3b | √ | √ | √ | +| vigogne-7b-chat | √ | √ | √ | +| xverse-7b-chat | √ | √ | √ | +| Yi-6b-Chat | √ | √ | √ | + ## DataType Supports